Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix rasdaemon crash during bootup on AMD CPU #19023

Merged
merged 2 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions build_debian.sh
Original file line number Diff line number Diff line change
Expand Up @@ -419,12 +419,6 @@ EOF
# override tcpdump profile to allow tcpdump access TACACS config file.
sudo cp files/apparmor/usr.bin.tcpdump $FILESYSTEM_ROOT/etc/apparmor.d/local/usr.bin.tcpdump

if [[ $CONFIGURED_ARCH == amd64 ]]; then
## Pre-install the fundamental packages for amd64 (x86)
sudo LANG=C DEBIAN_FRONTEND=noninteractive chroot $FILESYSTEM_ROOT apt-get -y install \
rasdaemon
fi

## Set /etc/shadow permissions to -rw-------.
sudo LANG=c chroot $FILESYSTEM_ROOT chmod 600 /etc/shadow

Expand Down
5 changes: 5 additions & 0 deletions files/build_templates/sonic_debian_extension.j2
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,11 @@ sudo cp $IMAGE_CONFIGS/corefile_uploader/core_analyzer.rc.json $FILESYSTEM_ROOT_
sudo chmod og-rw $FILESYSTEM_ROOT_ETC_SONIC/core_analyzer.rc.json

if [[ $CONFIGURED_ARCH == amd64 ]]; then
# Install rasdaemon package
# NOTE: Can be installed from debian directly when we move to trixie
sudo dpkg --root=$FILESYSTEM_ROOT -i $debs_path/rasdaemon_*.deb || \
sudo LANG=C DEBIAN_FRONTEND=noninteractive chroot $FILESYSTEM_ROOT apt-get -y install -f

# Rasdaemon service configuration. Use timer to start rasdaemon with a delay for better fast/warm boot performance
sudo cp $IMAGE_CONFIGS/rasdaemon/rasdaemon.timer $FILESYSTEM_ROOT_USR_LIB_SYSTEMD_SYSTEM
sudo LANG=C DEBIAN_FRONTEND=noninteractive chroot $FILESYSTEM_ROOT systemctl disable rasdaemon.service
Expand Down
10 changes: 10 additions & 0 deletions rules/rasdaemon.dep
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

SPATH := $($(RASDAEMON)_SRC_PATH)
DEP_FILES := $(SONIC_COMMON_FILES_LIST) rules/rasdaemon.mk rules/rasdaemon.dep
DEP_FILES += $(SONIC_COMMON_BASE_FILES_LIST)
DEP_FILES += $(shell git ls-files $(SPATH))

$(RASDAEMON)_CACHE_MODE := GIT_CONTENT_SHA
$(RASDAEMON)_DEP_FLAGS := $(SONIC_COMMON_FLAGS_LIST)
$(RASDAEMON)_DEP_FILES := $(DEP_FILES)

8 changes: 8 additions & 0 deletions rules/rasdaemon.mk
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# rasdaemon package

RASDAEMON_VERSION = 0.6.8-1
export RASDAEMON_VERSION

RASDAEMON = rasdaemon_$(RASDAEMON_VERSION)_$(CONFIGURED_ARCH).deb
$(RASDAEMON)_SRC_PATH = $(SRC_PATH)/rasdaemon
SONIC_MAKE_DEBS += $(RASDAEMON)
1 change: 1 addition & 0 deletions slave.mk
Original file line number Diff line number Diff line change
Expand Up @@ -1391,6 +1391,7 @@ $(addprefix $(TARGET_PATH)/, $(SONIC_INSTALLERS)) : $(TARGET_PATH)/% : \
$(addprefix $(PYTHON_WHEELS_PATH)/,$(SONIC_PLATFORM_API_PY3)) \
$(if $(findstring y,$(PDDF_SUPPORT)),$(addprefix $(PYTHON_WHEELS_PATH)/,$(PDDF_PLATFORM_API_BASE_PY2))) \
$(if $(findstring y,$(PDDF_SUPPORT)),$(addprefix $(PYTHON_WHEELS_PATH)/,$(PDDF_PLATFORM_API_BASE_PY3))) \
$(if $(findstring amd64,$(CONFIGURED_ARCH)),$(addprefix $(IMAGE_DISTRO_DEBS_PATH)/,$(RASDAEMON))) \
$(addprefix $(PYTHON_WHEELS_PATH)/,$(SONIC_YANG_MODELS_PY3)) \
$(addprefix $(PYTHON_WHEELS_PATH)/,$(SONIC_CTRMGRD)) \
$(addprefix $(FILES_PATH)/,$($(SONIC_CTRMGRD)_FILES)) \
Expand Down
3 changes: 3 additions & 0 deletions sonic-slave-bookworm/Dockerfile.j2
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,9 @@ RUN apt-get update && apt-get install -y \
qemu-kvm \
libvirt-clients \
python3-pexpect \
# For rasdaemon build
libsqlite3-dev \
libgettextpo-dev \
{%- endif %}
# For ntp
autogen \
Expand Down
38 changes: 38 additions & 0 deletions src/rasdaemon/0001-Check-CPUs-online-not-configured.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
From f1ea76375281001cdf4a048c1a4a24d86c6fbe48 Mon Sep 17 00:00:00 2001
From: Zeph / Liz Loss-Cutler-Hull <warp-spam_git@aehallh.com>
Date: Sun, 9 Jul 2023 04:57:19 -0700
Subject: [PATCH] Check CPUs online, not configured.

When the number of CPUs detected is greater than the number of CPUs in
the system, rasdaemon will crash when it receives some events.

Looking deeper, we also fail to use the poll method for similar reasons
in this case.

All of this can be prevented by checking to see how many CPUs are
currently online (sysconf(_SC_NPROCESSORS_ONLN)) instead of how many
CPUs the current kernel was configured to support
(sysconf(_SC_NPROCESSORS_CONF)).

For the kernel side of the discussion, see https://lore.kernel.org/lkml/CAM6Wdxft33zLeeXHhmNX5jyJtfGTLiwkQSApc=10fqf+rQh9DA@mail.gmail.com/T/
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
---
ras-events.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ras-events.c b/ras-events.c
index a82dab2..5935163 100644
--- a/ras-events.c
+++ b/ras-events.c
@@ -350,7 +350,7 @@ static void parse_ras_data(struct pthread_data *pdata, struct kbuffer *kbuf,

static int get_num_cpus(struct ras_events *ras)
{
- return sysconf(_SC_NPROCESSORS_CONF);
+ return sysconf(_SC_NPROCESSORS_ONLN);
#if 0
char fname[MAX_PATH + 1];
int num_cpus = 0;
--
2.36.1

23 changes: 23 additions & 0 deletions src/rasdaemon/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
.ONESHELL:
SHELL = /bin/bash
.SHELLFLAGS += -e

MAIN_TARGET = rasdaemon_$(RASDAEMON_VERSION)_$(CONFIGURED_ARCH).deb

$(addprefix $(DEST)/, $(MAIN_TARGET)): $(DEST)/% :
rm -rf rasdaemon/

# Checkout Repository
git clone https://salsa.debian.org/tai271828/rasdaemon.git -b debian/$(RASDAEMON_VERSION)

pushd ./rasdaemon
# Patch
git apply ../0001-Check-CPUs-online-not-configured.patch
ifeq ($(CROSS_BUILD_ENVIRON), y)
dpkg-buildpackage -rfakeroot -b -us -uc -a$(CONFIGURED_ARCH) -Pcross,nocheck -j$(SONIC_CONFIG_MAKE_JOBS) --admindir $(SONIC_DPKG_ADMINDIR)
else
dpkg-buildpackage -rfakeroot -b -us -uc -j$(SONIC_CONFIG_MAKE_JOBS) --admindir $(SONIC_DPKG_ADMINDIR)
endif
popd

mv $* $(DEST)/
Loading