From 067671a1c2372871bcad56660920859b2e685985 Mon Sep 17 00:00:00 2001 From: Ben Cressey Date: Thu, 18 Nov 2021 03:09:57 +0000 Subject: [PATCH] release: use systemd-repart for partition resizing Switch from our `growpart` tool to `systemd-repart` to resize the data partition. For a unified root+data image. `growpart` uses the `gptman` crate, which calls the BLKRRPART ioctl to tell the kernel to re-read the partition table. This call fails if the device contains mounted partitions. `systemd-repart` uses the newer BLKPG ioctl, which manipulates the kernel's view of individual partitions. This works even if the root filesystem is present on the same device and already mounted. It also avoids the need to handle the partition symlink going away and coming back, since udev does not get the change event that triggers this. The two tools differ in how much free space is left on the device after the last partition is resized. `growpart` ends the partition one sector before the last 1 MiB boundary, while `systemd-repart` ends it just before the GPT label. Both tools run on every boot. To avoid problems on downgrade after a newer release resizes the data filesystem beyond where the older release will end the partition, we constrain `systemd-repart` to leave the older number of free sectors. Since `/local` can be mounted during the resize operation, we can use a real mount unit for it, which greatly simplifies the dependencies, and allows us to decouple the "prepare" logic from "resize" logic. Signed-off-by: Ben Cressey --- packages/release/local.mount | 14 ++++++++ packages/release/prepare-local.service | 42 ++++++---------------- packages/release/release-repart-local.conf | 12 +++++++ packages/release/release.spec | 16 +++++++-- packages/release/repart-local.service | 29 +++++++++++++++ 5 files changed, 78 insertions(+), 35 deletions(-) create mode 100644 packages/release/local.mount create mode 100644 packages/release/release-repart-local.conf create mode 100644 packages/release/repart-local.service diff --git a/packages/release/local.mount b/packages/release/local.mount new file mode 100644 index 00000000000..0d1c468bac3 --- /dev/null +++ b/packages/release/local.mount @@ -0,0 +1,14 @@ +[Unit] +Description=Local Directory (/local) +DefaultDependencies=no +Conflicts=umount.target +Before=local-fs.target umount.target + +[Mount] +What=/dev/disk/by-partlabel/BOTTLEROCKET-DATA +Where=/local +Type=ext4 +Options=defaults,noatime,nosuid,nodev + +[Install] +WantedBy=preconfigured.target diff --git a/packages/release/prepare-local.service b/packages/release/prepare-local.service index a41f79973a5..167873d9c50 100644 --- a/packages/release/prepare-local.service +++ b/packages/release/prepare-local.service @@ -1,44 +1,20 @@ [Unit] Description=Prepare Local Directory (/local) DefaultDependencies=no - -# We need udev to create /dev/disk/by-partlabel/BOTTLEROCKET-DATA first. -Wants=dev-disk-by\x2dpartlabel-BOTTLEROCKET\x2dDATA.device -After=dev-disk-by\x2dpartlabel-BOTTLEROCKET\x2dDATA.device +RequiresMountsFor=/local [Service] Type=oneshot -Environment=BOTTLEROCKET_DATA=/dev/disk/by-partlabel/BOTTLEROCKET-DATA Environment=LOCAL_DIR=/local -# To "grow" the partition, we delete it and recreate it at the larger size, then -# write it back to the device. udevd observes the write via inotify, and tells -# the kernel to reload the partition table. This causes the partition link to be -# deleted and then recreated. -ExecStart=/usr/sbin/growpart ${BOTTLEROCKET_DATA} - -# If the GPT label was not already at the end of the disk, the first pass will -# write it there, but any additional sectors beyond the original position were -# not included in the resized partition. Now that the kernel has reloaded the -# partition table, the second pass can find and use those sectors. -ExecStart=/usr/sbin/growpart ${BOTTLEROCKET_DATA} - -# The above note means we can't have a "normal" mount unit here, because it would -# depend on the link, and would immediately transition to the failed state when the -# link is removed. systemd will create local.mount for us as a side effect. -ExecStart=/usr/bin/mount \ - -o defaults,noatime,nosuid,nodev \ - ${BOTTLEROCKET_DATA} ${LOCAL_DIR} - -# After the mount is active, we grow the filesystem to fill the resized partition, -# and ensure that it has the directories we need for subsequent mounts. -ExecStart=/usr/lib/systemd/systemd-growfs ${LOCAL_DIR} +# Create the directories we need for our bind mounts. ExecStart=/usr/bin/mkdir -p ${LOCAL_DIR}/var ${LOCAL_DIR}/opt ${LOCAL_DIR}/mnt # Create the directories we need to set up a read-write overlayfs for the kernel -# development sources and the kernel modules -ExecStart=/usr/bin/rm -rf ${LOCAL_DIR}/var/lib/kernel-devel \ - %{LOCAL_DIR}/var/lib/kernel-modules +# development sources and kernel modules. +ExecStart=/usr/bin/rm -rf \ + ${LOCAL_DIR}/var/lib/kernel-devel \ + ${LOCAL_DIR}/var/lib/kernel-modules ExecStart=/usr/bin/mkdir -p \ ${LOCAL_DIR}/var/lib/kernel-devel/.overlay/lower \ ${LOCAL_DIR}/var/lib/kernel-devel/.overlay/upper \ @@ -48,11 +24,13 @@ ExecStart=/usr/bin/mkdir -p \ # Create the directories we need to set up a read-write overlayfs for any CNI # plugin binaries. -ExecStart=/usr/bin/rm -rf ${LOCAL_DIR}/opt/cni ${LOCAL_DIR}/var/lib/cni-plugins +ExecStart=/usr/bin/rm -rf \ + ${LOCAL_DIR}/opt/cni \ + ${LOCAL_DIR}/var/lib/cni-plugins ExecStart=/usr/bin/mkdir -p \ ${LOCAL_DIR}/opt/cni/bin \ ${LOCAL_DIR}/var/lib/cni-plugins/.overlay/upper \ - ${LOCAL_DIR}/var/lib/cni-plugins/.overlay/work \ + ${LOCAL_DIR}/var/lib/cni-plugins/.overlay/work RemainAfterExit=true StandardError=journal+console diff --git a/packages/release/release-repart-local.conf b/packages/release/release-repart-local.conf new file mode 100644 index 00000000000..c026191367a --- /dev/null +++ b/packages/release/release-repart-local.conf @@ -0,0 +1,12 @@ +[Partition] +# This is the partition type UUID for BOTTLEROCKET-DATA, which will be resized +# to fill the remaining sectors on the disk where it resides. +Type=626f7474-6c65-6474-6861-726d61726b73 + +# We want the partition to end on the last 1 MiB boundary before the end of +# the disk, to match the historical implementation. Assuming the disk itself is +# an even multiple of MiBs in size, and using 512 byte sectors as an example, +# we need 33 sectors for the GPT label in the last MiB, and therefore want 2015 +# sectors left, or 1031680 bytes. The repart tool expects a multiple of 4096, +# which is (1031680 - (1031680 % 4096)), or 1028096 bytes. +PaddingMinBytes=1028096 diff --git a/packages/release/release.spec b/packages/release/release.spec index 6cfb4c7e5c8..e1a38b9f65c 100644 --- a/packages/release/release.spec +++ b/packages/release/release.spec @@ -7,6 +7,7 @@ Summary: Bottlerocket release License: Apache-2.0 OR MIT Source11: nsswitch.conf +Source96: release-repart-local.conf Source97: release-sysctl.conf Source98: release-systemd-system.conf Source99: release-tmpfiles.conf @@ -31,6 +32,7 @@ Source1008: var-lib-bottlerocket.mount Source1009: etc-cni.mount Source1010: mnt.mount Source1012: opt-cni-bin.mount +Source1013: local.mount # CD-ROM mount & associated udev rules Source1015: media-cdrom.mount @@ -45,6 +47,7 @@ Source1023: lib-modules.mount.in # Mounts that require helper programs Source1040: prepare-boot.service Source1041: prepare-local.service +Source1042: repart-local.service # Services for kdump support Source1060: capture-kernel-dump.service @@ -101,6 +104,9 @@ install -p -m 0644 %{S:11} %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir} install -d %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/wicked/ifconfig install -p -m 0644 %{S:1000} %{buildroot}%{_cross_factorydir}%{_cross_sysconfdir}/wicked/ifconfig +install -d %{buildroot}%{_cross_libdir}/repart.d +install -p -m 0644 %{S:96} %{buildroot}%{_cross_libdir}/repart.d/80-local.conf + install -d %{buildroot}%{_cross_sysctldir} install -p -m 0644 %{S:97} %{buildroot}%{_cross_sysctldir}/80-release.conf @@ -117,9 +123,9 @@ EOF install -d %{buildroot}%{_cross_unitdir} install -p -m 0644 \ - %{S:1001} %{S:1002} %{S:1003} %{S:1004} %{S:1005} \ - %{S:1006} %{S:1007} %{S:1008} %{S:1009} %{S:1010} %{S:1011} %{S:1012} \ - %{S:1015} %{S:1040} %{S:1041} %{S:1060} %{S:1061} %{S:1062} %{S:1080} \ + %{S:1001} %{S:1002} %{S:1003} %{S:1004} %{S:1005} %{S:1006} %{S:1007} \ + %{S:1008} %{S:1009} %{S:1010} %{S:1011} %{S:1012} %{S:1013} %{S:1015} \ + %{S:1040} %{S:1041} %{S:1042} %{S:1060} %{S:1061} %{S:1062} %{S:1080} \ %{buildroot}%{_cross_unitdir} install -d %{buildroot}%{_cross_unitdir}/systemd-tmpfiles-setup.service.d @@ -162,6 +168,8 @@ ln -s %{_cross_unitdir}/preconfigured.target %{buildroot}%{_cross_unitdir}/defau %{_cross_sysctldir}/80-release.conf %{_cross_tmpfilesdir}/release.conf %{_cross_libdir}/os-release +%dir %{_cross_libdir}/repart.d +%{_cross_libdir}/repart.d/80-local.conf %{_cross_libdir}/systemd/system.conf.d/80-release.conf %{_cross_unitdir}/configured.target %{_cross_unitdir}/preconfigured.target @@ -174,12 +182,14 @@ ln -s %{_cross_unitdir}/preconfigured.target %{buildroot}%{_cross_unitdir}/defau %{_cross_unitdir}/load-crash-kernel.service %{_cross_unitdir}/prepare-boot.service %{_cross_unitdir}/prepare-local.service +%{_cross_unitdir}/repart-local.service %{_cross_unitdir}/var.mount %{_cross_unitdir}/opt.mount %{_cross_unitdir}/mnt.mount %{_cross_unitdir}/etc-cni.mount %{_cross_unitdir}/opt-cni-bin.mount %{_cross_unitdir}/media-cdrom.mount +%{_cross_unitdir}/local.mount %{_cross_unitdir}/*-lower.mount %{_cross_unitdir}/*-kernels.mount %{_cross_unitdir}/*-licenses.mount diff --git a/packages/release/repart-local.service b/packages/release/repart-local.service new file mode 100644 index 00000000000..bbcb18f9214 --- /dev/null +++ b/packages/release/repart-local.service @@ -0,0 +1,29 @@ +[Unit] +Description=Resize Data Partition +DefaultDependencies=no +Conflicts=shutdown.target +Wants=dev-disk-by\x2dpartlabel-BOTTLEROCKET\x2dDATA.device +After=dev-disk-by\x2dpartlabel-BOTTLEROCKET\x2dDATA.device + +# Ensure the device is mounted first, to avoid racing with the unit that tries +# to mount it since the symlink can disappear if the partition is resized. +RequiresMountsFor=/local + +[Service] +Type=oneshot + +# Resize the partition, whether or not it resides on the same disk as /. +ExecStart=/usr/bin/systemd-repart --dry-run=no /dev/disk/by-partlabel/BOTTLEROCKET-DATA + +# Grow the filesystem to fill the partition. Doing this in another unit could +# introduce a race if the underlying block device is not ready after resizing. +ExecStart=/usr/lib/systemd/systemd-growfs /local + +RemainAfterExit=true +StandardError=journal+console + +# systemd-repart returns 77 if there's no existing GPT partition table +SuccessExitStatus=77 + +[Install] +WantedBy=local-fs.target