-
Notifications
You must be signed in to change notification settings - Fork 53
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add preflight OS and NIC and other checks
- Implemented OS preflight checks to validate system requirements before Ceph cluster creation. - Checks include: - OS version (RHEL 9+ required) - SELinux enforcing mode - Firewalld installation and status - Required package availability (rpcbind, podman, firewalld) - Podman version check (>= 3.3) - RHEL software profile validation - Tuned profile check - CPU, RAM, Swap, and Filesystem (part of other checks) - Check whether jumbo frames are enabled - Is it configured with DHCP or static IP - Is the bandwidth sufficient - Collect and output current NIC options set (e.g. Bonding, not bridged or virtual) - Check and report network latency (ping) with all hosts provided in the inventory file - Separate NICs for front-end and back-end networks
- Loading branch information
1 parent
1d3efbc
commit 39a250e
Showing
4 changed files
with
340 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,4 +22,6 @@ infra_pkgs: | |
- podman | ||
- lvm2 | ||
- sos | ||
- rpcbind | ||
- firewalld | ||
client_group: clients |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,314 @@ | ||
--- | ||
- name: Initialize preflight results list | ||
set_fact: | ||
preflight_results: [] | ||
preflight_failures: [] | ||
|
||
- name: Gather all Ansible facts | ||
setup: | ||
|
||
- name: Check if OS is RHEL 9+ | ||
set_fact: | ||
os_check: "{{ 'PASS' if ansible_facts['distribution'] == 'RedHat' and ansible_facts['distribution_major_version'] | int >= 9 else 'FAIL' }}" | ||
os_reason: "{{ 'Ceph requires RHEL 9+. Detected: ' ~ ansible_facts['distribution'] ~ ' ' ~ ansible_facts['distribution_version'] if ansible_facts['distribution_major_version'] | int < 9 else 'N/A' }}" | ||
|
||
- name: Store OS check result | ||
set_fact: | ||
preflight_results: "{{ preflight_results + [{'Check': 'OS Version', 'Result': os_check, 'Reason': os_reason}] }}" | ||
preflight_failures: "{{ preflight_failures + ['OS Version'] if os_check == 'FAIL' else preflight_failures }}" | ||
|
||
- name: Ensure SELinux is set to Enforcing mode | ||
ansible.posix.selinux: | ||
policy: targeted | ||
state: enforcing | ||
register: selinux_status | ||
changed_when: false | ||
failed_when: selinux_status.failed | ||
|
||
- name: Retrieve SELinux status from ansible_facts | ||
setup: | ||
gather_subset: | ||
- selinux | ||
|
||
- name: Determine SELinux Check Result | ||
set_fact: | ||
selinux_check: "{{ 'PASS' if ansible_facts['selinux']['status'] == 'enabled' and ansible_facts['selinux']['mode'] == 'enforcing' else 'FAIL' }}" | ||
|
||
- name: Determine SELinux Failure Reason | ||
set_fact: | ||
selinux_reason: "{{ 'SELinux was not in enforcing mode and could not be enforced automatically' if selinux_check == 'FAIL' else 'N/A' }}" | ||
|
||
- name: Store SELinux check result | ||
set_fact: | ||
selinux_check: "{{ 'PASS' if ansible_facts['selinux']['status'] == 'enabled' and ansible_facts['selinux']['mode'] == 'enforcing' else 'FAIL' }}" | ||
selinux_reason: "{{ 'SELinux was not in enforcing mode and could not be enforced automatically' if selinux_check == 'FAIL' else 'N/A' }}" | ||
preflight_results: "{{ preflight_results + [{'Check': 'SELinux', 'Result': selinux_check, 'Reason': selinux_reason}] }}" | ||
preflight_failures: "{{ preflight_failures + ['SELinux'] if selinux_check == 'FAIL' else preflight_failures }}" | ||
|
||
- name: Ensure required packages are installed | ||
package: | ||
name: "{{ infra_pkgs }}" | ||
state: present | ||
register: package_install | ||
failed_when: false | ||
|
||
- name: Determine Package Installation Check Result | ||
set_fact: | ||
package_check: "{{ 'PASS' if not package_install.failed else 'FAIL' }}" | ||
|
||
- name: Determine Package Installation Failure Reason | ||
set_fact: | ||
package_reason: "{{ 'Some required packages failed to install' if package_check == 'FAIL' else 'N/A' }}" | ||
|
||
- name: Store Package Installation Result | ||
set_fact: | ||
preflight_results: "{{ preflight_results + [{'Check': 'Required Packages Installed', 'Result': package_check, 'Reason': package_reason}] }}" | ||
preflight_failures: "{{ preflight_failures + ['Required Packages'] if package_check == 'FAIL' else preflight_failures }}" | ||
|
||
- name: Ensure firewalld is enabled and running | ||
systemd: | ||
name: firewalld | ||
state: started | ||
enabled: true | ||
register: firewall_status | ||
failed_when: false | ||
|
||
- name: Determine Firewalld Check Status | ||
set_fact: | ||
firewalld_check: "{{ 'PASS' if firewall_status.status.ActiveState == 'active' else 'FAIL' }}" | ||
firewalld_reason: "{{ 'Firewalld was not running and could not be started' if firewall_status.failed else 'N/A' }}" | ||
|
||
- name: Store Firewalld check result | ||
set_fact: | ||
firewalld_check: "{{ 'PASS' if firewall_status.status.ActiveState == 'active' else 'FAIL' }}" | ||
firewalld_reason: "{{ 'Firewalld was not running and could not be started' if firewall_status.failed else 'N/A' }}" | ||
preflight_results: "{{ preflight_results + [{'Check': 'Firewalld Running', 'Result': firewalld_check, 'Reason': firewalld_reason}] }}" | ||
preflight_failures: "{{ preflight_failures + ['Firewalld Running'] if firewalld_check == 'FAIL' else preflight_failures }}" | ||
|
||
- name: Collect installed package facts | ||
package_facts: | ||
manager: auto | ||
|
||
- name: Check if Podman is installed | ||
set_fact: | ||
podman_installed: "{{ 'podman' in ansible_facts.packages }}" | ||
|
||
- name: Extract Podman version | ||
set_fact: | ||
podman_version: "{{ ansible_facts.packages['podman'][0].version if podman_installed else 'NOT_INSTALLED' }}" | ||
|
||
- name: Define Podman Check Variables | ||
set_fact: | ||
podman_check: "{{ 'PASS' if podman_installed else 'FAIL' }}" | ||
podman_reason: "{{ 'Podman is not installed, required for Ceph' if not podman_installed else 'Podman version is ' ~ podman_version }}" | ||
preflight_failures: "{{ preflight_failures + ['Podman Installed'] if not podman_installed else preflight_failures }}" | ||
|
||
- name: Store Podman Installation Check | ||
set_fact: | ||
preflight_results: "{{ preflight_results + [{'Check': 'Podman Installed', 'Result': podman_check, 'Reason': podman_reason}] }}" | ||
|
||
- name: Ensure Podman is installed if missing (Fixable) | ||
package: | ||
name: podman | ||
state: present | ||
when: not podman_installed | ||
|
||
- name: Validate RHEL software profile | ||
command: subscription-manager list --consumed | ||
register: rhel_profile | ||
changed_when: false | ||
failed_when: false | ||
|
||
- name: Define RHEL Profile Check Result | ||
set_fact: | ||
rhel_profile_check: "{{ 'PASS' if ('Server' in rhel_profile.stdout and 'File and Storage Server' in rhel_profile.stdout) else 'FAIL' }}" | ||
|
||
- name: Define RHEL Profile Check Reason | ||
set_fact: | ||
rhel_profile_reason: "{{ 'Incorrect RHEL software profile. Expected: Server with File and Storage Server.' if rhel_profile_check == 'FAIL' else 'N/A' }}" | ||
|
||
- name: Store RHEL Profile check | ||
set_fact: | ||
preflight_results: "{{ preflight_results + [{'Check': 'RHEL Profile', 'Result': rhel_profile_check, 'Reason': rhel_profile_reason}] }}" | ||
preflight_failures: "{{ preflight_failures + ['RHEL Profile'] if rhel_profile_check == 'FAIL' else preflight_failures }}" | ||
|
||
- name: Get current tuned profile | ||
command: tuned-adm active | ||
register: tuned_profile | ||
changed_when: false | ||
failed_when: false | ||
|
||
- name: Define Tuned Profile Check Result | ||
set_fact: | ||
tuned_profile_check: "{{ 'PASS' if 'throughput-performance' in tuned_profile.stdout else 'FAIL' }}" | ||
|
||
- name: Define Tuned Profile Check Reason | ||
set_fact: | ||
tuned_profile_reason: "{{ 'Incorrect tuned profile. Expected: throughput-performance' if tuned_profile_check == 'FAIL' else 'N/A' }}" | ||
|
||
- name: Store Tuned Profile Check | ||
set_fact: | ||
preflight_results: "{{ preflight_results + [{'Check': 'Tuned Profile', 'Result': tuned_profile_check, 'Reason': tuned_profile_reason}] }}" | ||
preflight_failures: "{{ preflight_failures + ['Tuned Profile'] if tuned_profile_check == 'FAIL' else preflight_failures }}" | ||
|
||
- name: Check CPU requirements | ||
shell: "lscpu | grep -q 'avx2' && echo 'yes' || echo 'no'" | ||
register: cpu_supports_x86_64_v2 | ||
changed_when: false | ||
failed_when: false | ||
|
||
- name: Define CPU Check Variables | ||
set_fact: | ||
cpu_checks: | ||
x86_64_v2: | ||
result: "{{ 'PASS' if cpu_supports_x86_64_v2.stdout | trim == 'yes' else 'FAIL' }}" | ||
reason: "{{ 'AVX2 instruction set missing. RHEL 9 requires AVX2 support.' if cpu_supports_x86_64_v2.stdout | trim != 'yes' else 'N/A' }}" | ||
cores: | ||
result: "{{ 'PASS' if ansible_facts['processor_vcpus'] | int >= 4 else 'FAIL' }}" | ||
reason: "{{ 'System has only ' ~ ansible_facts['processor_vcpus'] ~ ' cores, required: 4' if ansible_facts['processor_vcpus'] | int < 4 else 'N/A' }}" | ||
|
||
- name: Store CPU Checks | ||
set_fact: | ||
preflight_results: "{{ preflight_results + [ | ||
{'Check': 'CPU x86-64-v2', 'Result': cpu_checks['x86_64_v2']['result'], 'Reason': cpu_checks['x86_64_v2']['reason']}, | ||
{'Check': 'CPU Cores >= 4', 'Result': cpu_checks['cores']['result'], 'Reason': cpu_checks['cores']['reason']} | ||
] }}" | ||
preflight_failures: "{{ preflight_failures + | ||
(['CPU x86-64-v2'] if cpu_checks['x86_64_v2']['result'] == 'FAIL' else []) + | ||
(['CPU Cores'] if cpu_checks['cores']['result'] == 'FAIL' else []) }}" | ||
|
||
- name: Define RAM and Swap Check Variables | ||
set_fact: | ||
memory_checks: | ||
ram: | ||
result: "{{ 'PASS' if ansible_facts['memtotal_mb'] | int >= 8192 else 'FAIL' }}" | ||
reason: "{{ 'System has only ' ~ ansible_facts['memtotal_mb'] ~ ' MB RAM, required: 8192MB' if ansible_facts['memtotal_mb'] | int < 8192 else 'N/A' }}" | ||
swap: | ||
required: "{{ ((ansible_facts['memtotal_mb'] | int * 1.5) | round) | int }}" | ||
actual: "{{ ansible_facts['swaptotal_mb'] | int }}" | ||
result: "{{ 'PASS' if (ansible_facts['swaptotal_mb'] | int) >= ((ansible_facts['memtotal_mb'] | int * 1.5) | round) | int else 'FAIL' }}" | ||
reason: "{{ 'System has only ' ~ ansible_facts['swaptotal_mb'] ~ ' MB Swap, required: ' ~ ((ansible_facts['memtotal_mb'] | int * 1.5) | round) | int ~ ' MB' if ansible_facts['swaptotal_mb'] | int < ((ansible_facts['memtotal_mb'] | int * 1.5) | round) | int else 'N/A' }}" | ||
|
||
- name: Store RAM and Swap Space Check Results | ||
set_fact: | ||
preflight_results: "{{ preflight_results + [ | ||
{'Check': 'Minimum RAM (8GB)', 'Result': memory_checks['ram']['result'], 'Reason': memory_checks['ram']['reason']}, | ||
{'Check': 'Swap Space (1.5x RAM)', 'Result': memory_checks['swap']['result'], 'Reason': memory_checks['swap']['reason']} | ||
] }}" | ||
preflight_failures: "{{ preflight_failures + | ||
(['Minimum RAM'] if memory_checks['ram']['result'] == 'FAIL' else []) + | ||
(['Swap Space'] if memory_checks['swap']['result'] == 'FAIL' else []) }}" | ||
|
||
- name: Define /var Partition and Root Filesystem Check Variables | ||
set_fact: | ||
filesystem_checks: | ||
var_partition: | ||
exists: "{{ ansible_facts['mounts'] | selectattr('mount', 'equalto', '/var') | list | length > 0 }}" | ||
result: "{{ 'PASS' if (ansible_facts['mounts'] | selectattr('mount', 'equalto', '/var') | list | length > 0) else 'FAIL' }}" | ||
reason: "{{ 'N/A' if (ansible_facts['mounts'] | selectattr('mount', 'equalto', '/var') | list | length > 0) else '/var is not a separate partition' }}" | ||
root_fs: | ||
size_gb: "{{ (ansible_facts['mounts'] | selectattr('mount', 'equalto', '/') | map(attribute='size_total') | first | default(0) | int // 1024**3) }}" | ||
result: "{{ 'PASS' if ((ansible_facts['mounts'] | selectattr('mount', 'equalto', '/') | map(attribute='size_total') | first | default(0) | int // 1024**3) >= 100) else 'FAIL' }}" | ||
reason: "{{ 'Root FS is only ' ~ (ansible_facts['mounts'] | selectattr('mount', 'equalto', '/') | map(attribute='size_total') | first | default(0) | int // 1024**3) ~ 'GB, required: 100GB' if ((ansible_facts['mounts'] | selectattr('mount', 'equalto', '/') | map(attribute='size_total') | first | default(0) | int // 1024**3) < 100) else 'N/A' }}" | ||
|
||
- name: Store Filesystem Checks | ||
set_fact: | ||
preflight_results: "{{ preflight_results + [ | ||
{'Check': '/var is a separate partition', 'Result': filesystem_checks['var_partition']['result'], 'Reason': filesystem_checks['var_partition']['reason']}, | ||
{'Check': 'Root Filesystem >= 100GB', 'Result': filesystem_checks['root_fs']['result'], 'Reason': filesystem_checks['root_fs']['reason']} | ||
] }}" | ||
preflight_failures: "{{ preflight_failures + | ||
(['/var Partition'] if filesystem_checks['var_partition']['result'] == 'FAIL' else []) + | ||
(['Root Filesystem'] if filesystem_checks['root_fs']['result'] == 'FAIL' else []) }}" | ||
|
||
- name: Extract networking facts | ||
set_fact: | ||
primary_nic: "{{ ansible_facts['default_ipv4']['interface'] }}" | ||
primary_ip: "{{ ansible_facts['default_ipv4']['address'] }}" | ||
primary_mac: "{{ ansible_facts['default_ipv4']['macaddress'] }}" | ||
primary_mtu: "{{ ansible_facts[ansible_facts['default_ipv4']['interface']]['mtu'] | default('0') | int }}" | ||
primary_speed: "{{ ansible_facts[ansible_facts['default_ipv4']['interface']]['speed'] | default('-1') | int }}" | ||
primary_dhcp: "{{ 'dhcp' if ansible_facts['default_ipv4'].get('gateway') else 'manual' }}" | ||
|
||
- name: Define Jumbo Frames Check | ||
set_fact: | ||
jumbo_frames_check: "{{ 'PASS' if (primary_mtu | int) > 1500 else 'FAIL' }}" | ||
jumbo_frames_reason: "{{ 'MTU is ' ~ (primary_mtu | int) ~ ', recommended > 1500' if (primary_mtu | int) <= 1500 else 'N/A' }}" | ||
|
||
- name: Store Jumbo Frames Check | ||
set_fact: | ||
preflight_results: "{{ preflight_results + [{'Check': 'Jumbo Frames Enabled', 'Result': jumbo_frames_check, 'Reason': jumbo_frames_reason}] }}" | ||
|
||
- name: Define NIC Configuration Check | ||
set_fact: | ||
nic_config_check: "{{ 'PASS' if primary_dhcp == 'manual' else 'FAIL' }}" | ||
nic_config_reason: "{{ 'NIC is using DHCP, static IP is recommended' if primary_dhcp != 'manual' else 'N/A' }}" | ||
|
||
- name: Store NIC Configuration Check | ||
set_fact: | ||
preflight_results: "{{ preflight_results + [{'Check': 'NIC Static IP Configuration', 'Result': nic_config_check, 'Reason': nic_config_reason}] }}" | ||
|
||
- name: Define NIC Bandwidth Check | ||
set_fact: | ||
nic_speed_check: "{{ 'PASS' if (primary_speed | int) >= 10000 else 'FAIL' }}" | ||
nic_speed_reason: "{{ 'NIC speed is ' ~ primary_speed ~ ' Mbps, recommended is 10GbE' if (primary_speed | int) < 10000 else 'N/A' }}" | ||
|
||
- name: Store NIC Bandwidth Check | ||
set_fact: | ||
preflight_results: "{{ preflight_results + [{'Check': 'NIC Bandwidth (10GbE Recommended)', 'Result': nic_speed_check, 'Reason': nic_speed_reason}] }}" | ||
|
||
- name: Extract NIC Details | ||
set_fact: | ||
nic_config_details: "{{ ansible_facts['interfaces'] }}" | ||
|
||
- name: Store NIC Configuration Info | ||
set_fact: | ||
preflight_results: "{{ preflight_results + [{'Check': 'NIC Configuration', 'Result': 'INFO', 'Reason': 'NIC options: ' ~ nic_config_details | join(', ')}] }}" | ||
|
||
- name: Identify Front-End and Back-End NICs | ||
set_fact: | ||
frontend_nic: "{{ ansible_facts['default_ipv4']['interface'] | default('Unknown') }}" | ||
backend_nic: "{{ ansible_facts['interfaces'] | difference(['lo', ansible_facts['default_ipv4']['interface']]) | first | default(ansible_facts['default_ipv4']['interface']) }}" | ||
|
||
- name: Define NIC Separation Check | ||
set_fact: | ||
nic_separation_check: "{{ 'PASS' if frontend_nic != backend_nic else 'FAIL' }}" | ||
nic_separation_reason: "{{ 'Using same NIC for both front-end and back-end networks. Customers with large deployments should separate traffic for performance optimization.' if frontend_nic == backend_nic else 'N/A' }}" | ||
preflight_failures: "{{ preflight_failures + ['NIC Separation'] if frontend_nic == backend_nic else preflight_failures }}" | ||
|
||
- name: Store NIC Separation Check | ||
set_fact: | ||
preflight_results: "{{ preflight_results + [{'Check': 'Separate NICs for Frontend & Backend Networks', 'Result': nic_separation_check, 'Reason': nic_separation_reason}] }}" | ||
|
||
- name: Ping all hosts in inventory | ||
ansible.builtin.ping: | ||
register: ping_results | ||
failed_when: false | ||
delegate_to: "{{ item }}" | ||
with_items: "{{ groups['all'] }}" | ||
|
||
- name: Store Network Latency Check | ||
set_fact: | ||
preflight_results: "{{ preflight_results + [{'Check': 'Network Latency', 'Result': 'INFO', 'Reason': 'Latency results: ' ~ ping_results.results | map(attribute='ping') | list}] }}" | ||
|
||
- name: Generate Preflight Check Report | ||
delegate_to: localhost | ||
run_once: true | ||
become: false | ||
template: | ||
src: templates/preflight_report.j2 | ||
dest: ./ceph_preflight_report.txt | ||
|
||
- name: Read Preflight Check Report | ||
slurp: | ||
src: ./ceph_preflight_report.txt | ||
register: report_content | ||
|
||
- name: Show Report Summary | ||
debug: | ||
msg: "{{ report_content['content'] | b64decode | regex_replace('\\r', '') | split('\n') }}" | ||
|
||
- name: Final Check - Fail if any critical checks failed | ||
fail: | ||
msg: "Preflight checks failed for the following: {{ preflight_failures | join(', ') }}. Please resolve these issues before proceeding." | ||
when: preflight_failures | length > 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
Preflight Check Report | ||
|
||
================================================== | ||
System Checks | ||
-------------------------------------------------- | ||
{% for item in preflight_results %} | ||
- **{{ item['Check'] }}**: {% if item['Result'] == 'PASS' %}✅ Passed{% else %}❌ Failed{% endif %} | ||
- **Reason:** {{ item['Reason'] }} | ||
{% endfor %} | ||
|
||
================================================== | ||
Summary | ||
-------------------------------------------------- | ||
{% if preflight_failures | length > 0 %} | ||
❌ **Critical Failures Detected**: | ||
- {{ preflight_failures | join(', ') }} | ||
- **Action Required**: Please fix the above issues before proceeding. | ||
{% else %} | ||
✅ **All Critical Checks Passed! You are good to go.** | ||
{% endif %} |