diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 16379b06..7e74d9f0 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -34,6 +34,8 @@ variables: - cib_properties_one_set - cib_resources_create - cib_rsc_op_defaults + - cib_stonith_levels_validation + - cib_stonith_levels - cluster_advanced_knet_full - cluster_advanced_knet_implicit - cluster_advanced_udp_full @@ -203,10 +205,19 @@ unit_tests: - PYTHONPATH="./library:./module_utils:$PYTHONPATH" python -m unittest --verbose tests/unit/*.py # tier 1 +# RHEL 8.4 with the newest compatible Ansible doesn't work with the new ansible +# galaxy deployed on 2023-09-30. It works with the old one, though, so we set +# it to connect to the old one using the --server option. +# references: +# https://www.ansible.com/blog/new-ansible-galaxy +# https://forum.ansible.com/t/new-ansible-galaxy/1155/20 +# https://github.com/ansible/awx/issues/14496#issuecomment-1743711473 .role_test: stage: tier1 script: - - ansible-galaxy -vv collection install -r ./meta/collection-requirements.yml + - if [ "x${BASE_IMAGE_NAME}" == "xLsrRhel8OldestAnsibleCurrent" ]; then GALAXY_SERVER="https://old-galaxy.ansible.com"; fi + - echo "$GALAXY_SERVER" + - ansible-galaxy collection install -vvv "--server=${GALAXY_SERVER}" -r ./meta/collection-requirements.yml - varsparams="" - TEST_FILE_NAME="tests_${TEST_FILE}.yml" - echo "$TEST_FILE_NAME" diff --git a/README.md b/README.md index 4f907006..447443c7 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ An Ansible role for managing High Availability Clustering. * Pacemaker cluster properties * stonith and resources * resource defaults and resource operation defaults + * stonith levels, also known as fencing topology * resource constraints ## Requirements @@ -137,7 +138,9 @@ recommended to vault encrypt the value, see string, no default - optional -Needed only if a `ha_cluster_quorum` is configured to use a qdevice of type `net` AND password of the `hacluster` user on the qdevice is different from `ha_cluster_hacluster_password`. This user has full access to a cluster. It is +Needed only if a `ha_cluster_quorum` is configured to use a qdevice of type +`net` AND password of the `hacluster` user on the qdevice is different from +`ha_cluster_hacluster_password`. This user has full access to a cluster. It is recommended to vault encrypt the value, see for details. @@ -212,33 +215,35 @@ certificate - key pair. #### `ha_cluster_pcsd_certificates` -If there is no pcsd private key and certificate, there are two ways to create them. +If there is no pcsd private key and certificate, there are two ways to create +them. -One way is by setting `ha_cluster_pcsd_certificates` variable. -Another way is by setting none of -[`ha_cluster_pcsd_public_key_src` and `ha_cluster_pcsd_private_key_src`](#ha_cluster_pcsd_public_key_src-ha_cluster_pcsd_private_key_src) and `ha_cluster_pcsd_certificates`. +One way is by setting `ha_cluster_pcsd_certificates` variable. Another way is +by setting none of +[`ha_cluster_pcsd_public_key_src` and `ha_cluster_pcsd_private_key_src`](#ha_cluster_pcsd_public_key_src-ha_cluster_pcsd_private_key_src) +and `ha_cluster_pcsd_certificates`. -If `ha_cluster_pcsd_certificates` is provided, the `certificate` role is internally -used and it creates the private key and certificate for pcsd as defined. -If none of the variables are provided, the `ha_cluster` role will create pcsd -certificates via pcsd itself. +If `ha_cluster_pcsd_certificates` is provided, the `certificate` role is +internally used and it creates the private key and certificate for pcsd as +defined. If none of the variables are provided, the `ha_cluster` role will +create pcsd certificates via pcsd itself. -The value of `ha_cluster_pcsd_certificates` is set to the variable `certificate_requests` -in the `certificate` role. -For more information, see the `certificate_requests` section in the `certificate` -role documentation. +The value of `ha_cluster_pcsd_certificates` is set to the variable +`certificate_requests` in the `certificate` role. For more information, see the +`certificate_requests` section in the `certificate` role documentation. The default value is `[]`. -NOTE: The `certificate` role, unless using IPA and joining the systems to an IPA domain, -creates self-signed certificates, so you will need to explicitly configure trust, -which is not currently supported by the system roles. +NOTE: The `certificate` role, unless using IPA and joining the systems to an +IPA domain, creates self-signed certificates, so you will need to explicitly +configure trust, which is not currently supported by the system roles. NOTE: When you set `ha_cluster_pcsd_certificates`, you must not set -`ha_cluster_pcsd_public_key_src` and `ha_cluster_pcsd_private_key_src` variables. +`ha_cluster_pcsd_public_key_src` and `ha_cluster_pcsd_private_key_src` +variables. -NOTE: When you set `ha_cluster_pcsd_certificates`, `ha_cluster_regenerate_keys` is -ignored for this certificate - key pair. +NOTE: When you set `ha_cluster_pcsd_certificates`, `ha_cluster_regenerate_keys` +is ignored for this certificate - key pair. #### `ha_cluster_regenerate_keys` @@ -749,6 +754,51 @@ The structure is the same as for rules are described in section `resource op defaults set create` of `pcs(8)` man page. +#### `ha_cluster_stonith_levels` + +structure, default: no stonith levels + +```yaml +ha_cluster_stonith_levels: + - level: 1..9 + target: node_name + target_pattern: node_name_regular_expression + target_attribute: node_attribute_name + target_value: node_attribute_value + resource_ids: + - fence_device_1 + - fence_device_2 + - level: 1..9 + target: node_name + target_pattern: node_name_regular_expression + target_attribute: node_attribute_name + target_value: node_attribute_value + resource_ids: + - fence_device_1 + - fence_device_2 +``` + +This variable defines stonith levels, also known as fencing topology. They +configure the cluster to use multiple devices to fence nodes. You may define +alternative devices in case one fails, or require multiple devices to all be +executed successfully in order to consider a node successfully fenced, or even +a combination of the two. + +The items are as follows: + +* `level` (mandatory) - Order in which to attempt the levels. Levels are + attempted in ascending order until one succeeds. +* `target` (optional) - Name of a node this level applies to. +* `target_pattern` (optional) - Regular expression (as defined in + [POSIX](https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_04)) + matching names of nodes this level applies to. +* `target_attribute` and `target_value` (optional) - Name and value of a node + attribute that is set for nodes this level applies to. +* Exactly one of `target`, `target_pattern`, `target_attribute` must be + specified. +* `resource_ids` (mandatory) - List of stonith resources that must all be tried + for this level. + #### `ha_cluster_constraints_location` structure, default: no constraints @@ -1558,6 +1608,57 @@ SBD stonith resource. - linux-system-roles.ha_cluster ``` +### Configuring stonith levels + +```yaml +- hosts: node1 node2 + vars: + ha_cluster_cluster_name: my-new-cluster + ha_cluster_hacluster_password: password + ha_cluster_resource_primitives: + - id: apc1 + agent: 'stonith:fence_apc_snmp' + instance_attrs: + - attrs: + - name: ip + value: apc1.example.com + - name: username + value: user + - name: password + value: secret + - name: pcmk_host_map + value: node1:1;node2:2 + - id: apc2 + agent: 'stonith:fence_apc_snmp' + instance_attrs: + - attrs: + - name: ip + value: apc2.example.com + - name: username + value: user + - name: password + value: secret + - name: pcmk_host_map + value: node1:1;node2:2 + # Nodes have redundant power supplies, apc1 and apc2. Cluster must ensure + # that when attempting to reboot a node, both power supplies are turned off + # before either power supply is turned back on. + ha_cluster_stonith_levels: + - level: 1 + target: node1 + resource_ids: + - apc1 + - apc2 + - level: 1 + target: node2 + resource_ids: + - apc1 + - apc2 + + roles: + - linux-system-roles.ha_cluster +``` + ### Creating a cluster with resource constraints ```yaml diff --git a/defaults/main.yml b/defaults/main.yml index 77141769..e08b0cdb 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -53,6 +53,8 @@ ha_cluster_resource_groups: [] ha_cluster_resource_clones: [] ha_cluster_resource_bundles: [] +ha_cluster_stonith_levels: [] + ha_cluster_constraints_location: [] ha_cluster_constraints_colocation: [] ha_cluster_constraints_order: [] diff --git a/examples/stonith-levels.yml b/examples/stonith-levels.yml new file mode 100644 index 00000000..8b47612a --- /dev/null +++ b/examples/stonith-levels.yml @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: MIT +--- +- name: Example ha_cluster role invocation - stonith levels + hosts: all + vars: + ha_cluster_manage_firewall: true + ha_cluster_manage_selinux: true + ha_cluster_cluster_name: my-new-cluster + ha_cluster_hacluster_password: password + ha_cluster_resource_primitives: + - id: apc1 + agent: 'stonith:fence_apc_snmp' + instance_attrs: + - attrs: + - name: ip + value: apc1.example.com + - name: username + value: user + - name: password + value: secret + - name: pcmk_host_map + value: node1:1;node2:2 + - id: apc2 + agent: 'stonith:fence_apc_snmp' + instance_attrs: + - attrs: + - name: ip + value: apc2.example.com + - name: username + value: user + - name: password + value: secret + - name: pcmk_host_map + value: node1:1;node2:2 + # Nodes have redundant power supplies, apc1 and apc2. Cluster must ensure + # that when attempting to reboot a node, both power supplies are turned off + # before either power supply is turned back on. + ha_cluster_stonith_levels: + - level: 1 + target: node1 + resource_ids: + - apc1 + - apc2 + - level: 1 + target: node2 + resource_ids: + - apc1 + - apc2 + + roles: + - linux-system-roles.ha_cluster diff --git a/tasks/shell_pcs/check-and-prepare-role-variables.yml b/tasks/shell_pcs/check-and-prepare-role-variables.yml index aadf53db..09f77e7b 100644 --- a/tasks/shell_pcs/check-and-prepare-role-variables.yml +++ b/tasks/shell_pcs/check-and-prepare-role-variables.yml @@ -37,6 +37,28 @@ - ha_cluster_cluster_present - ha_cluster_qnetd.present | d(false) + - name: Fail if no valid level is specified for a fencing level + fail: + msg: Specify 'level' 1..9 for each fencing level + when: + - not((item.level | d() | int) > 0 and (item.level | d() | int) < 10) + loop: "{{ ha_cluster_stonith_levels }}" + run_once: true + + - name: Fail if no target is specified for a fencing level + fail: + msg: > + Specify exactly one of 'target', 'target_pattern', 'target_attribute' + for each fencing level + when: + - > + [item.target is defined, + item.target_pattern is defined, + item.target_attribute is defined] + | select("true") | list | length != 1 + loop: "{{ ha_cluster_stonith_levels }}" + run_once: true + - name: Discover cluster node names set_fact: __ha_cluster_node_name: "{{ ha_cluster.node_name | d(inventory_hostname) }}" diff --git a/tasks/shell_pcs/create-and-push-cib.yml b/tasks/shell_pcs/create-and-push-cib.yml index 5d98e3a9..4520f1ff 100644 --- a/tasks/shell_pcs/create-and-push-cib.yml +++ b/tasks/shell_pcs/create-and-push-cib.yml @@ -153,8 +153,15 @@ resource_clone: "{{ item }}" loop: "{{ ha_cluster_resource_clones }}" - ## Constraints + ## Stonith levels + - name: Configure stonith levels + include_tasks: pcs-cib-stonith-level.yml + loop: "{{ ha_cluster_stonith_levels }}" + loop_control: + index_var: stonith_level_index + loop_var: stonith_level + ## Constraints - name: Configure resource location constraints include_tasks: pcs-cib-constraint-location.yml loop: "{{ ha_cluster_constraints_location }}" diff --git a/tasks/shell_pcs/pcs-cib-stonith-level.yml b/tasks/shell_pcs/pcs-cib-stonith-level.yml new file mode 100644 index 00000000..3bec6931 --- /dev/null +++ b/tasks/shell_pcs/pcs-cib-stonith-level.yml @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: MIT +--- +- name: Configure stonith level {{ stonith_level_index }} + command: + cmd: > + pcs -f {{ __ha_cluster_tempfile_cib_xml.path | quote }} + -- stonith level add + {{ stonith_level.level | quote }} + + {% if stonith_level.target | d() %} + {{ stonith_level.target | quote }} + {% elif stonith_level.target_pattern | d() %} + regexp%{{ stonith_level.target_pattern | quote }} + {% else %} + attrib%{{ stonith_level.target_attribute | quote }}={{ + stonith_level.target_value | d() | quote }} + {% endif %} + + {% for resource_id in stonith_level.resource_ids %} + {{ resource_id | quote }} + {% endfor %} + # We always need to create CIB to see whether it's the same as what is + # already present in the cluster. However, we don't want to report it as a + # change since the only thing which matters is pushing the resulting CIB to + # the cluster. + check_mode: false + changed_when: not ansible_check_mode diff --git a/tests/tests_cib_stonith_levels.yml b/tests/tests_cib_stonith_levels.yml new file mode 100644 index 00000000..7251b2ed --- /dev/null +++ b/tests/tests_cib_stonith_levels.yml @@ -0,0 +1,95 @@ +# SPDX-License-Identifier: MIT +--- +- name: Configure stonith levels + hosts: all + vars_files: vars/main.yml + + tasks: + - name: Run test + tags: tests::verify + block: + - name: Set up test environment + include_role: + name: linux-system-roles.ha_cluster + tasks_from: test_setup.yml + + - name: Find first node name + set_fact: + __test_first_node: "{{ + (ansible_play_hosts_all | length == 1) + | ternary('localhost', ansible_play_hosts[0]) }}" + + - name: Run HA Cluster role + include_role: + name: linux-system-roles.ha_cluster + public: true + vars: + ha_cluster_cluster_name: test-cluster + ha_cluster_manage_firewall: true + ha_cluster_manage_selinux: true + ha_cluster_resource_primitives: + - id: fence1 + agent: 'stonith:fence_kdump' + - id: fence2 + agent: 'stonith:fence_kdump' + ha_cluster_stonith_levels: + - level: 1 + target: "{{ __test_first_node }}" + resource_ids: + - fence1 + - level: 2 + target_pattern: node-\d+ + resource_ids: + - fence2 + - level: 3 + target_attribute: some-name + resource_ids: + - fence1 + - fence2 + - level: 4 + target_attribute: some-name + target_value: some-value + resource_ids: + - fence2 + - fence1 + + - name: Fetch versions of cluster components + include_tasks: tasks/fetch_versions.yml + + - name: Verify stonith levels + vars: + __test_regexp_label: "{{ + __test_pcs_version is version('0.10.10', '>') + | ternary(' (regexp)', '') }}" + __test_expected_lines: + - "Target: {{ __test_first_node }}" + - " Level 1 - fence1" + - "Target{{ __test_regexp_label }}: node-\\d+" + - " Level 2 - fence2" + - "Target: some-name=" + - " Level 3 - fence1,fence2" + - "Target: some-name=some-value" + - " Level 4 - fence2,fence1" + block: + - name: Fetch stonith levels configuration from the cluster + command: + cmd: pcs stonith level config + register: __test_pcs_stonith_level_config + changed_when: false + + - name: Print real stonith levels configuration + debug: + var: __test_pcs_stonith_level_config + + - name: Print expected stonith levels configuration + debug: + var: __test_expected_lines | list + + - name: Check stonith levels configuration + assert: + that: + - __test_pcs_stonith_level_config.stdout_lines + == __test_expected_lines | list + + - name: Check firewall and selinux state + include_tasks: tasks/check_firewall_selinux.yml diff --git a/tests/tests_cib_stonith_levels_validation.yml b/tests/tests_cib_stonith_levels_validation.yml new file mode 100644 index 00000000..11fad685 --- /dev/null +++ b/tests/tests_cib_stonith_levels_validation.yml @@ -0,0 +1,95 @@ +# SPDX-License-Identifier: MIT +--- +- name: Ensure stonith levels are properly defined + hosts: all + vars_files: vars/main.yml + + tasks: + - name: Run test + tags: tests::verify + block: + - name: Set up test environment + include_role: + name: linux-system-roles.ha_cluster + tasks_from: test_setup.yml + + - name: Check that target is specified + block: + - name: Run HA Cluster role 1 + include_role: + name: linux-system-roles.ha_cluster + vars: + ha_cluster_cluster_name: test-cluster + ha_cluster_stonith_levels: + - level: 1 + resource_ids: + - fence1 + rescue: + - name: Check errors 1 + assert: + that: + - ansible_failed_result.results[0].msg == + "Specify exactly one of 'target', 'target_pattern', " + ~ "'target_attribute' for each fencing level\n" + run_once: true # noqa: run_once[task] + + - name: Check that exactly one target is specified + block: + - name: Run HA Cluster role 2 + include_role: + name: linux-system-roles.ha_cluster + vars: + ha_cluster_cluster_name: test-cluster + ha_cluster_stonith_levels: + - level: 1 + target: node + target_pattern: node + resource_ids: + - fence1 + rescue: + - name: Check errors 2 + assert: + that: + - ansible_failed_result.results[0].msg == + "Specify exactly one of 'target', 'target_pattern', " + ~ "'target_attribute' for each fencing level\n" + run_once: true # noqa: run_once[task] + + - name: Check that level is specified + block: + - name: Run HA Cluster role 3 + include_role: + name: linux-system-roles.ha_cluster + vars: + ha_cluster_cluster_name: test-cluster + ha_cluster_stonith_levels: + - target: node + resource_ids: + - fence1 + rescue: + - name: Check errors 3 + assert: + that: + - ansible_failed_result.results[0].msg == + "Specify 'level' 1..9 for each fencing level" + run_once: true # noqa: run_once[task] + + - name: Check that level is correct + block: + - name: Run HA Cluster role 4 + include_role: + name: linux-system-roles.ha_cluster + vars: + ha_cluster_cluster_name: test-cluster + ha_cluster_stonith_levels: + - level: 10 + target: node + resource_ids: + - fence1 + rescue: + - name: Check errors 4 + assert: + that: + - ansible_failed_result.results[0].msg == + "Specify 'level' 1..9 for each fencing level" + run_once: true # noqa: run_once[task]