Skip to content

Commit

Permalink
Pretty print
Browse files Browse the repository at this point in the history
Make all URLs printed by dracut-metal-mdsquash contain a commit hash.

Remove the verbose `mount` and `umount` for pretterier output.

Update the `README.adoc` file with a better/verbose explanation of the
wipe process.
  • Loading branch information
rustydb committed Feb 20, 2023
1 parent ba0b119 commit 532fb4f
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 64 deletions.
20 changes: 18 additions & 2 deletions 90metalmdsquash/metal-lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,26 @@ _load_dracut_dep
export METAL_LOG_DIR='/var/log/metal'
mkdir -p $METAL_LOG_DIR

##############################################################################
# constant: METAL_HASH
# constant: METAL_DOCS_URL
#
# This is the VCS hash for commit that produced this library, it is auto-filled
# when this module is built into an OS package.
# This is useful for printing URLs to documentation that are relevant to the
# library running in an initramFS.
METAL_HASH='@@metal-hash@@'
if [[ ${METAL_HASH} =~ 'metal-hash' ]]; then
# Default to main if this is running directly out of the repo.
METAL_HASH='main'
fi
export METAL_HASH
export METAL_DOCS_URL=https://github.com/Cray-HPE/dracut-metal-mdsquash/tree/${METAL_HASH}

##############################################################################
# constant: METAL_DONE_FILE_PAVED
#
# This file path present a file that the wipe function creates when it is
# This file path present a file that the wipe function creates when it is
# invoked. The existence of the file implies the wipe code as been invoked,
# the contents of the file can be interpretted to determine what the wipe
# function actually did (see func metal_paved).
Expand Down Expand Up @@ -169,7 +185,7 @@ metal_die() {
fi
type die
echo >&2 "metal_die: $*"
echo >&2 "GitHub/Docs: https://github.com/Cray-HPE/dracut-metal-mdsquash"
echo >&2 "GitHub/Docs: ${METAL_DOCS_URL}/README.adoc"
sleep 30 # Leaving time (30seconds) for console/log buffers to catch up.
if [ "$_reset" = 1 ]; then

Expand Down
26 changes: 14 additions & 12 deletions 90metalmdsquash/metal-md-lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,10 @@ add_overlayfs() {
[ -f /tmp/metalovalimg.done ] && return
local mpoint=/metal/ovaldisk
mkdir -pv ${mpoint}
if ! mount -v -n -t xfs /dev/md/ROOT "$mpoint"; then
if ! mount -n -t xfs /dev/md/ROOT "$mpoint"; then

# try shasta-1.3 formatting or die.
mount -v -n -t ext4 /dev/md/ROOT "$mpoint" \
mount -n -t ext4 /dev/md/ROOT "$mpoint" \
|| metal_die "Failed to mount ${oval_drive_authority} as xfs or ext4"
fi

Expand All @@ -233,7 +233,7 @@ add_overlayfs() {
"${mpoint}/${live_dir}/${metal_overlayfs_id}" \
"${mpoint}/${live_dir}/${metal_overlayfs_id}/../ovlwork"
echo 1 > /tmp/metalovalimg.done && info 'OverlayFS is ready ...'
umount -v ${mpoint}
umount ${mpoint}
}

###############################################################################
Expand Down Expand Up @@ -261,13 +261,13 @@ fetch_sqfs() {

# Mount read-only to prevent harm to the device; we literally just need to pull the files off it.
mkdir -vp /tmp/source
mount -v -n -o ro -L "$metal_local_url_authority" /tmp/source || metal_die "Failed to mount $metal_local_url_authority from $metal_server"
mount -n -o ro -L "$metal_local_url_authority" /tmp/source || metal_die "Failed to mount $metal_local_url_authority from $metal_server"
(
set -e
cd "$1"
cp -pv "/tmp/source/${metal_local_dir#//}/${squashfs_file}" . && echo "copied ${squashfs_file} ... " > debug_log
) || warn 'Failed to copy ; may retry'
umount -v /tmp/source
umount /tmp/source
fi
if [ -f "$1/${squashfs_file}" ]; then
echo 1 > /tmp/metalsqfsimg.done
Expand Down Expand Up @@ -316,10 +316,10 @@ add_sqfs() {
info "URI host ${uri_host} responds ... "
fi
mkdir -pv $sqfs_store
if mount -v -n -t xfs /dev/md/SQFS $sqfs_store; then
if mount -n -t xfs /dev/md/SQFS $sqfs_store; then
mkdir -pv "$sqfs_store/$live_dir"
fetch_sqfs "$sqfs_store/$live_dir" || metal_die 'Failed to fetch squashFS into squashFS storage!'
umount -v $sqfs_store
umount $sqfs_store
else
# No RAID mount, issue warning, delete mount-point and return
metal_die "Failed to mount /dev/md/SQFS at $sqfs_store"
Expand Down Expand Up @@ -348,7 +348,7 @@ pave() {
# the original run.
if [ -f "$METAL_DONE_FILE_PAVED" ]; then
echo "${FUNCNAME[0]} already done" >>"$log"
echo "wipe done file already exists ($METAL_DONE_FILE_PAVED); not wiping disks"
echo "wipe 'done file' already exists ($METAL_DONE_FILE_PAVED); not wiping disks"
return 0
fi
{
Expand All @@ -361,17 +361,19 @@ pave() {
} >>"$log" 2>&1

if [ "$metal_nowipe" -ne 0 ]; then
echo "${FUNCNAME[0]} skipped: metal.no-wipe=${metal_nowipe}" >>$log
warn 'local storage device wipe [ safeguard: ENABLED ]'
warn 'local storage devices will not be wiped.'
warn "local storage devices will not be wiped (${METAL_DOCS_URL}#metalno-wipe)"
echo 0 > "$METAL_DONE_FILE_PAVED" && return 0
else
warn 'local storage device wipe [ safeguard: DISABLED ]'
warn "local storage devices WILL be wiped (${METAL_DOCS_URL}#metalno-wipe)"
fi
warn 'local storage device wipe commencing (USB devices are ignored) ...'
warn 'local storage device wipe commencing ...'
warn "local storage device wipe ignores USB devices and block devices smaller than [$metal_ignore_threshold] bytes."

warn 'nothing can be done to stop this except one one thing ...'
warn "... power this node off within the next [$metal_wipe_delay] seconds to prevent any and all operations ..."
warn "power this node off within the next [$metal_wipe_delay] second(s) to cancel."
warn "NOTE: this delay can be adjusted, see: ${METAL_DOCS_URL}#metalwipe-delay"
while [ "${metal_wipe_delay}" -ge 0 ]; do
[ "${metal_wipe_delay}" = 2 ] && unit='second' || unit='seconds'
sleep 1 && local metal_wipe_delay=$((${metal_wipe_delay} - 1)) && echo "${metal_wipe_delay} $unit"
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ rpm_package_source:
tar --transform 'flags=r;s,^,/${NAME}-${VERSION}/,' --exclude .git --exclude dist -cvjf $(SOURCE_PATH) .

rpm_build_source:
BUILD_METADATA=$(BUILD_METADATA) rpmbuild --nodeps -ts $(SOURCE_PATH) --define "_topdir $(BUILD_DIR)"
rpmbuild --nodeps -ts $(SOURCE_PATH) --define "_topdir $(BUILD_DIR)"

rpm_build:
BUILD_METADATA=$(BUILD_METADATA) rpmbuild --nodeps -ba $(SPEC_FILE) --define "_topdir $(BUILD_DIR)"
rpmbuild --nodeps -ba $(SPEC_FILE) --define "_topdir $(BUILD_DIR)"
140 changes: 92 additions & 48 deletions README.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ In order to use this dracut module, you need:

. A local-attached-usb or remote server with a squashFS image.
. Physical block devices must be installed in your blade(s).
. Two physical disk of 0.5TiB or less (or the RAID must be overridden, see <<metal-mdsquash-customizations,metal mdsquash customization>>
. Two physical disk of 0.5TiB or less (or the RAID must be overridden, see <<_metal-mdsquash-customizations,metal mdsquash customization>>

== Usage

Expand All @@ -30,7 +30,7 @@ metal.server=<URI> root=live:LABEL=SQFSRAID
----

The above snippet is the minimal cmdline necessary for this module to function. Additional options
are denoted throughout the <<parameters,module customization>> section.
are denoted throughout the <<_kernel_parameters,module customization>> section.

== URI Drivers

Expand Down Expand Up @@ -60,49 +60,128 @@ http://10.100.101.111/some/local/server

Other drivers, such as native `s3`, `scp`, and `ftp` could be added but are not currently implemented.

These drivers schemes are all defined by the rule generator, link:./90metalmdsquash/metal-genrules.sh[`metal-genrules.sh`].
These drivers schemes are all defined by the rule generator, link:./90metalmdsquash/metal-genrules.sh[`metal-genrules.sh`^].

[#_kernel_parameters]
== Kernel Parameters

[#_metal-mdsquash-customizations]
=== metal-mdsquash customizations

==== `metal.debug`

Set `metal.debug=1` to enable debug output from only metal modules. This will verbosely print the creation of the RAIDs and fetching of the squashFS image. *This effectively runs all dracut-metal code with `set -x`*, while leaving the rest of dracut to its own debug level.
Set `metal.debug=1` to enable debug output from only metal modules.This will verbosely print the creation of the RAIDs and fetching of the squashFS image. *This effectively runs all dracut-metal code with `set -x`*, while leaving the rest of dracut to its own debug level.

- `Default: 0`

==== `metal.disks`

Specify the number of disks to use in the local RAID (see link:README.md#metalmd-level[`metal.md-level`] for changing the RAID type).
Specify the number of disks to use in the local RAID (see <<_metal_md_level>> for changing the RAID type).

- `Default: 2`

[#_metal_md_level]
==== `metal.md-level`

Change the level passed to mdadm for RAID creation, possible values are any value it takes.
Milaege varies, buyer beware this could dig a hole deeper.

- `Default: mirror`

NOTE: When `metal.disks=1` is set, a RAID array is still created but with only one member.
In this case, only mirror and stripe will produce

==== `metal.no-wipe`

If this is set to `metal.no-wipe=1`, then all destructive behavior is disabled. The metal modules will either use what they find or make 0 changes during boots. This is insurance, it should not be required. This is helpful for development, or for admins tracking old and new nodes.
Determines if the wipe function should run, `metal.no-wipe=0` will wipe block devices and make them ready for partitioning. `metal.no-wipe=1`
will disable this behavior.

- `Default: 0`

.Note that a warning will print with a timeout in which the user may power the node `off` to avoid a wipe. This timeout can be adjusted, see <<_metal_wipe_delay>>.

The following storage items are removed and/or prepared for partitioning as a raw disk:

. LVMs (specifically `'vg_name=~ceph*' and 'vg_name=~metal*'`)
** This removes any CEPH volumes
** Any volume prefixed with `metal` is considered a relative to this module and will be removed
* Volumes are removed with `vgremove`
. `/dev/md` devices
** MD Devices are stopped
** Magic bits erased
** Each memeber's superblocks are zeroed
. `/dev/sd` and `/dev/nvme` devices
** Magic bits erased
. Any/all USB devices are ignored
. Any/all devices smaller than `metal.min-disk-size*1024**3 bytes` is ignored (see <<_metal_min_disk_size>>)
. `partprobe` is invoked to update/notify the kernel of the partition changes
. Any LVMs that weren't on a device that was wiped will still exist, since only specific LVMs are targeted

.Example output of a wipe running
[source,text]
----
Warning: local storage device wipe [ safeguard: DISABLED ]
Warning: local storage devices WILL be wiped (https://github.com/Cray-HPE/dracut-metal-mdsquash/tree/7d303b3193619f642b1316ce2b1968ee1cc82a69#metalno-wipe)
Warning: local storage device wipe commencing ...
Warning: local storage device wipe ignores USB devices and block devices less then or equal to [17179869184] bytes.
Warning: nothing can be done to stop this except one one thing ...
Warning: power this node off within the next [5] seconds to cancel.
Warning: NOTE: this delay can be adjusted, see: https://github.com/Cray-HPE/dracut-metal-mdsquash/tree/7d303b3193619f642b1316ce2b1968ee1cc82a69#metalwipe-delay)
Found volume group "metalvg0" using metadata type lvm2
Found volume group "ceph-ec4a2c46-e0ab-4f89-b7dc-6c044ce9a24b" using metadata type lvm2
Found volume group "ceph-2c5c9402-7bc2-4a8c-8eba-028532b91d9f" using metadata type lvm2
Found volume group "ceph-a38bb9f7-99ef-4536-82cf-2550a406da38" using metadata type lvm2
Found volume group "ceph-c1e6018e-6a50-4b17-a15d-b387ae66b8a4" using metadata type lvm2
VG #PV #LV #SN Attr VSize VFree
ceph-2c5c9402-7bc2-4a8c-8eba-028532b91d9f 1 1 0 wz--n- <1.75t 0
ceph-a38bb9f7-99ef-4536-82cf-2550a406da38 1 1 0 wz--n- <1.75t 0
ceph-c1e6018e-6a50-4b17-a15d-b387ae66b8a4 1 1 0 wz--n- <447.13g 0
ceph-ec4a2c46-e0ab-4f89-b7dc-6c044ce9a24b 1 1 0 wz--n- <1.75t 0
metalvg0 1 3 0 wz--n- 279.14g 149.14g
Warning: removing all volume groups of name [vg_name=~ceph*]
Failed to clear hint file.
Logical volume "osd-block-a8c05059-d921-4546-884d-f63f606f966c" successfully removed
Volume group "ceph-ec4a2c46-e0ab-4f89-b7dc-6c044ce9a24b" successfully removed
Logical volume "osd-block-d70a9ddd-9b8c-42e0-98cb-5f5279dcef5a" successfully removed
Volume group "ceph-2c5c9402-7bc2-4a8c-8eba-028532b91d9f" successfully removed
Logical volume "osd-block-d2e9e4cf-c670-418f-847e-39ade3208d04" successfully removed
Volume group "ceph-a38bb9f7-99ef-4536-82cf-2550a406da38" successfully removed
Logical volume "osd-block-b6085667-54dc-4e01-810b-25c093a510dc" successfully removed
Volume group "ceph-c1e6018e-6a50-4b17-a15d-b387ae66b8a4" successfully removed
Warning: removing all volume groups of name [vg_name=~metal*]
Failed to clear hint file.
Logical volume "CEPHETC" successfully removed
Logical volume "CEPHVAR" successfully removed
Logical volume "CONTAIN" successfully removed
Volume group "metalvg0" successfully removed
Warning: local storage device wipe is targeting the following RAID(s): [/dev/md124 /dev/md125 /dev/md126 /dev/md127]
Warning: local storage device wipe is targeting the following block devices: [/dev/sda /dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf]
Warning: local storage disk wipe complete
Found the following disks for the main RAID array (qty. [2]): [sda sdb]
mdadm: size set to 487360K
mdadm: array /dev/md/BOOT started.
mdadm: size set to 23908352K
mdadm: array /dev/md/SQFS started.
mdadm: size set to 146352128K
mdadm: automatically enabling write-intent bitmap on large array
mdadm: array /dev/md/ROOT started.
mdadm: chunk size defaults to 512K
mdadm: array /dev/md/AUX started.
----

[#_metal_wipe_delay]
==== `metal.wipe-delay`

The number of seconds that the wipe function will wait to allow an administrator to cancel it (by powering the node off). See the source code in link:./90metalmdsquash/metal-md-lib.sh[`metal-md-lib.sh`] for minimum and maximum values.
The number of seconds that the wipe function will wait to allow an administrator to cancel it (by powering the node off).See the source code in link:./90metalmdsquash/metal-md-lib.sh[`metal-md-lib.sh`] for minimum and maximum values.

- `Default: 5`
- `Unit: Seconds`

==== `metal.ipv4`

By default, metal-dracut will use IPv4 to resolve the deployment server for the initial call-to-home and when downloading artifacts regardless if IPv6 networking is present in the environment. To disable this constraint, simply set `metal.ipv4=0` in the cmdline. Setting this to `0` will enable all `ping` and `curl` calls for calling-home and downloading artifacts to use *either* IPv6 or IPv4 on their own accord (e.g. if IPv6 exists, then `ping` and `curl` will prefer to use it by default). Presumably if IPv6 is desired and exists, then IPv6 DHCP/DNS and general TCP/IP connectivity is working.
By default, metal-dracut will use IPv4 to resolve the deployment server for the initial call-to-home and when downloading artifacts regardless if IPv6 networking is present in the environment.To disable this constraint, simply set `metal.ipv4=0` in the cmdline.Setting this to `0` will enable all `ping` and `curl` calls for calling-home and downloading artifacts to use *either* IPv6 or IPv4 on their own accord (e.g. if IPv6 exists, then `ping` and `curl` will prefer to use it by default).Presumably if IPv6 is desired and exists, then IPv6 DHCP/DNS and general TCP/IP connectivity is working.
Lastly, if IPv6 does not exist then toggling this value to `0` has no effect.

- `Default: 1`

==== `metal.sqfs-md-size`
Expand All @@ -129,6 +208,7 @@ Buyer beware this does not resize, this applies for new partitions.
- `Default: 150`
- `Unit: Gigabytes`

[#_metal_min_disk_size]
==== `metal.min-disk-size`

Sets the minimum size threshold when wiping and partitioning disks, anything `&lt;` this left untouched.
Expand All @@ -138,7 +218,7 @@ Sets the minimum size threshold when wiping and partitioning disks, anything `&l

=== dmsquashlive customizations

reference: https://github.com/dracutdevs/dracut/blob/master/dracut.cmdline.7.asc#booting-live-images[dracut dmsquashlive cmdline]
reference: https://github.com/dracutdevs/dracut/blob/master/man/dracut.cmdline.7.asc#booting-live-images[dracut dmsquashlive cmdline^]

==== `rd.live.dir`

Expand Down Expand Up @@ -191,7 +271,7 @@ Specify the filename to refer to download.

=== dracut : standard customizations

reference: https://github.com/dracutdevs/dracut/blob/master/dracut.cmdline.7.asc#standard[dracut standard cmdline]
reference: https://github.com/dracutdevs/dracut/blob/master/man/dracut.cmdline.7.asc#standard[dracut standard cmdline^]

==== `rootfallback`

Expand All @@ -207,41 +287,5 @@ The idea of persistence is that changes _persist_ across reboots, when the state
changes it preserves information. For servers that boot images into memory (also known as live images),
an overlayFS is a common method for providing persistent storage.

=== Feature Toggles

Metal squashFS URL Dracut module has a few feature toggles, by default it is recommended to leave
them alone unless you must change them for your environment.

==== Toggling Persistence

Disable the overlayFS entirely by setting `rd.live.overlay=0`, this will cause a temporary overlay
to be created that exists in memory. A prompt may appear during boot to acknowledge the RAM overlayFS.

To disable it entirely, delete all `rd.live.overlay.*` options.

==== Toggling Read-Only OverlayFS

Setting `rd.live.readonly=1` will cause the next boot's persistent overlayFS to be mounted
as read-only. This has a different convention in overlayFS and will look differently on your
system pending certain toggles:

* either an
additional, non-persistent, writable snapshot overlay will be
stacked over a read-only snapshot, /dev/mapper/live-ro, of the
base filesystem with the persistent overlay,
* or a read-only loop
device, in the case of a writable rootfs.img,
* *(default)* or an OverlayFS
mount will use the persistent overlay directory linked at
/run/overlayfs-r as an additional lower layer along with the base
root filesystem and apply a transient, writable upper directory
overlay, in order to complete the booted root filesystem.

==== Toggling Resetting the Persistent OverlayFS on Boot

To cleanly reset the overlayFS, reboot the node with this kernel option:
`rd.live.overlay.reset=1`.

The OverlayFS is reset by recreating the image file if it doesn't exist, and then by wiping the image
file if it does exist. The wipe is controlled by dracut-native (dmsquash-live), the creation of
the image file is handled by this dracut module (metal-squashfs-url-dracut).
The overlayFS created by this dracut module is used by the dmsquash-live module, all dracut live image
kernel parameters should function alongside this module.
5 changes: 5 additions & 0 deletions dracut-metal-mdsquash.spec
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ Provides: %{module_name}
%setup -q

%build
%define hash %(git rev-parse --verify HEAD)
if [ -n %{hash} ]; then
echo %{hash}
sed -i 's,@@metal-hash@@,%{hash},g' %{module_name}/metal-lib.sh
fi

%install
%{__mkdir_p} %{buildroot}%{url_dracut_doc}
Expand Down

0 comments on commit 532fb4f

Please sign in to comment.