From d84fe3db20a3d14aa8c3fee5b145b6afa48a5b53 Mon Sep 17 00:00:00 2001 From: Vivek Reddy Date: Tue, 14 May 2024 22:14:53 +0000 Subject: [PATCH 1/2] Fix RAS daemon crash during boot on AMD CPU Signed-off-by: Vivek Reddy --- build_debian.sh | 6 --- .../build_templates/sonic_debian_extension.j2 | 5 +++ rules/rasdaemon.dep | 10 +++++ rules/rasdaemon.mk | 8 ++++ slave.mk | 1 + sonic-slave-bookworm/Dockerfile.j2 | 3 ++ ...001-Check-CPUs-online-not-configured.patch | 38 +++++++++++++++++++ src/rasdaemon/Makefile | 23 +++++++++++ 8 files changed, 88 insertions(+), 6 deletions(-) create mode 100644 rules/rasdaemon.dep create mode 100644 rules/rasdaemon.mk create mode 100644 src/rasdaemon/0001-Check-CPUs-online-not-configured.patch create mode 100644 src/rasdaemon/Makefile diff --git a/build_debian.sh b/build_debian.sh index 508c5dffa8f6..86e5cbbb3977 100755 --- a/build_debian.sh +++ b/build_debian.sh @@ -419,12 +419,6 @@ EOF # override tcpdump profile to allow tcpdump access TACACS config file. sudo cp files/apparmor/usr.bin.tcpdump $FILESYSTEM_ROOT/etc/apparmor.d/local/usr.bin.tcpdump -if [[ $CONFIGURED_ARCH == amd64 ]]; then -## Pre-install the fundamental packages for amd64 (x86) -sudo LANG=C DEBIAN_FRONTEND=noninteractive chroot $FILESYSTEM_ROOT apt-get -y install \ - rasdaemon -fi - ## Set /etc/shadow permissions to -rw-------. sudo LANG=c chroot $FILESYSTEM_ROOT chmod 600 /etc/shadow diff --git a/files/build_templates/sonic_debian_extension.j2 b/files/build_templates/sonic_debian_extension.j2 index 77ecf9509acb..7336d5b73b1c 100644 --- a/files/build_templates/sonic_debian_extension.j2 +++ b/files/build_templates/sonic_debian_extension.j2 @@ -497,6 +497,11 @@ sudo cp $IMAGE_CONFIGS/corefile_uploader/core_analyzer.rc.json $FILESYSTEM_ROOT_ sudo chmod og-rw $FILESYSTEM_ROOT_ETC_SONIC/core_analyzer.rc.json if [[ $CONFIGURED_ARCH == amd64 ]]; then + # Install rasdaemon package + # NOTE: Can be installed from debian directly when we move to trixie + sudo dpkg --root=$FILESYSTEM_ROOT -i $debs_path/rasdaemon_*.deb || \ + sudo LANG=C DEBIAN_FRONTEND=noninteractive chroot $FILESYSTEM_ROOT apt-get -y install -f + # Rasdaemon service configuration. Use timer to start rasdaemon with a delay for better fast/warm boot performance sudo cp $IMAGE_CONFIGS/rasdaemon/rasdaemon.timer $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM sudo LANG=C DEBIAN_FRONTEND=noninteractive chroot $FILESYSTEM_ROOT systemctl disable rasdaemon.service diff --git a/rules/rasdaemon.dep b/rules/rasdaemon.dep new file mode 100644 index 000000000000..fdc82536e67b --- /dev/null +++ b/rules/rasdaemon.dep @@ -0,0 +1,10 @@ + +SPATH := $($(RASDAEMON)_SRC_PATH) +DEP_FILES := $(SONIC_COMMON_FILES_LIST) rules/rasdaemon.mk rules/rasdaemon.dep +DEP_FILES += $(SONIC_COMMON_BASE_FILES_LIST) +DEP_FILES += $(shell git ls-files $(SPATH)) + +$(INITRAMFS_TOOLS)_CACHE_MODE := GIT_CONTENT_SHA +$(INITRAMFS_TOOLS)_DEP_FLAGS := $(SONIC_COMMON_FLAGS_LIST) +$(INITRAMFS_TOOLS)_DEP_FILES := $(DEP_FILES) + diff --git a/rules/rasdaemon.mk b/rules/rasdaemon.mk new file mode 100644 index 000000000000..82486d6a151c --- /dev/null +++ b/rules/rasdaemon.mk @@ -0,0 +1,8 @@ +# rasdaemon package + +RASDAEMON_VERSION = 0.6.8-1 +export RASDAEMON_VERSION + +RASDAEMON = rasdaemon_$(RASDAEMON_VERSION)_$(CONFIGURED_ARCH).deb +$(RASDAEMON)_SRC_PATH = $(SRC_PATH)/rasdaemon +SONIC_MAKE_DEBS += $(RASDAEMON) diff --git a/slave.mk b/slave.mk index eb537e740527..eb43749f144d 100644 --- a/slave.mk +++ b/slave.mk @@ -1391,6 +1391,7 @@ $(addprefix $(TARGET_PATH)/, $(SONIC_INSTALLERS)) : $(TARGET_PATH)/% : \ $(addprefix $(PYTHON_WHEELS_PATH)/,$(SONIC_PLATFORM_API_PY3)) \ $(if $(findstring y,$(PDDF_SUPPORT)),$(addprefix $(PYTHON_WHEELS_PATH)/,$(PDDF_PLATFORM_API_BASE_PY2))) \ $(if $(findstring y,$(PDDF_SUPPORT)),$(addprefix $(PYTHON_WHEELS_PATH)/,$(PDDF_PLATFORM_API_BASE_PY3))) \ + $(if $(findstring amd64,$(CONFIGURED_ARCH)),$(addprefix $(IMAGE_DISTRO_DEBS_PATH)/,$(RASDAEMON))) \ $(addprefix $(PYTHON_WHEELS_PATH)/,$(SONIC_YANG_MODELS_PY3)) \ $(addprefix $(PYTHON_WHEELS_PATH)/,$(SONIC_CTRMGRD)) \ $(addprefix $(FILES_PATH)/,$($(SONIC_CTRMGRD)_FILES)) \ diff --git a/sonic-slave-bookworm/Dockerfile.j2 b/sonic-slave-bookworm/Dockerfile.j2 index 18a64a620cfb..3ffc41ca20fb 100644 --- a/sonic-slave-bookworm/Dockerfile.j2 +++ b/sonic-slave-bookworm/Dockerfile.j2 @@ -335,6 +335,9 @@ RUN apt-get update && apt-get install -y \ qemu-kvm \ libvirt-clients \ python3-pexpect \ +# For rasdaemon build + libsqlite3-dev \ + libgettextpo-dev \ {%- endif %} # For ntp autogen \ diff --git a/src/rasdaemon/0001-Check-CPUs-online-not-configured.patch b/src/rasdaemon/0001-Check-CPUs-online-not-configured.patch new file mode 100644 index 000000000000..c7c703fdb0d4 --- /dev/null +++ b/src/rasdaemon/0001-Check-CPUs-online-not-configured.patch @@ -0,0 +1,38 @@ +From f1ea76375281001cdf4a048c1a4a24d86c6fbe48 Mon Sep 17 00:00:00 2001 +From: Zeph / Liz Loss-Cutler-Hull +Date: Sun, 9 Jul 2023 04:57:19 -0700 +Subject: [PATCH] Check CPUs online, not configured. + +When the number of CPUs detected is greater than the number of CPUs in +the system, rasdaemon will crash when it receives some events. + +Looking deeper, we also fail to use the poll method for similar reasons +in this case. + +All of this can be prevented by checking to see how many CPUs are +currently online (sysconf(_SC_NPROCESSORS_ONLN)) instead of how many +CPUs the current kernel was configured to support +(sysconf(_SC_NPROCESSORS_CONF)). + +For the kernel side of the discussion, see https://lore.kernel.org/lkml/CAM6Wdxft33zLeeXHhmNX5jyJtfGTLiwkQSApc=10fqf+rQh9DA@mail.gmail.com/T/ +Signed-off-by: Mauro Carvalho Chehab +--- + ras-events.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/ras-events.c b/ras-events.c +index a82dab2..5935163 100644 +--- a/ras-events.c ++++ b/ras-events.c +@@ -350,7 +350,7 @@ static void parse_ras_data(struct pthread_data *pdata, struct kbuffer *kbuf, + + static int get_num_cpus(struct ras_events *ras) + { +- return sysconf(_SC_NPROCESSORS_CONF); ++ return sysconf(_SC_NPROCESSORS_ONLN); + #if 0 + char fname[MAX_PATH + 1]; + int num_cpus = 0; +-- +2.36.1 + diff --git a/src/rasdaemon/Makefile b/src/rasdaemon/Makefile new file mode 100644 index 000000000000..15e6bbebe809 --- /dev/null +++ b/src/rasdaemon/Makefile @@ -0,0 +1,23 @@ +.ONESHELL: +SHELL = /bin/bash +.SHELLFLAGS += -e + +MAIN_TARGET = rasdaemon_$(RASDAEMON_VERSION)_$(CONFIGURED_ARCH).deb + +$(addprefix $(DEST)/, $(MAIN_TARGET)): $(DEST)/% : + rm -rf rasdaemon/ + + # Checkout Repository + git clone https://salsa.debian.org/tai271828/rasdaemon.git -b debian/$(RASDAEMON_VERSION) + + pushd ./rasdaemon + # Patch + git apply ../0001-Check-CPUs-online-not-configured.patch +ifeq ($(CROSS_BUILD_ENVIRON), y) + dpkg-buildpackage -rfakeroot -b -us -uc -a$(CONFIGURED_ARCH) -Pcross,nocheck -j$(SONIC_CONFIG_MAKE_JOBS) --admindir $(SONIC_DPKG_ADMINDIR) +else + dpkg-buildpackage -rfakeroot -b -us -uc -j$(SONIC_CONFIG_MAKE_JOBS) --admindir $(SONIC_DPKG_ADMINDIR) +endif + popd + + mv $* $(DEST)/ From 3a90f102de9ea3c2294a9615d9c5d55c119d4e7b Mon Sep 17 00:00:00 2001 From: Vivek Date: Mon, 20 May 2024 17:13:07 -0700 Subject: [PATCH 2/2] Fix rasdaemon dep file --- rules/rasdaemon.dep | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rules/rasdaemon.dep b/rules/rasdaemon.dep index fdc82536e67b..40621f3054ba 100644 --- a/rules/rasdaemon.dep +++ b/rules/rasdaemon.dep @@ -4,7 +4,7 @@ DEP_FILES := $(SONIC_COMMON_FILES_LIST) rules/rasdaemon.mk rules/rasdaemon.dep DEP_FILES += $(SONIC_COMMON_BASE_FILES_LIST) DEP_FILES += $(shell git ls-files $(SPATH)) -$(INITRAMFS_TOOLS)_CACHE_MODE := GIT_CONTENT_SHA -$(INITRAMFS_TOOLS)_DEP_FLAGS := $(SONIC_COMMON_FLAGS_LIST) -$(INITRAMFS_TOOLS)_DEP_FILES := $(DEP_FILES) +$(RASDAEMON)_CACHE_MODE := GIT_CONTENT_SHA +$(RASDAEMON)_DEP_FLAGS := $(SONIC_COMMON_FLAGS_LIST) +$(RASDAEMON)_DEP_FILES := $(DEP_FILES)