Skip to content

Commit

Permalink
Add XUnitLogChecker to log libraries dumps, do not enable for NativeA…
Browse files Browse the repository at this point in the history
…OT tests (#94868)

* Add XUnitLogChecker to log libraries dumps (#93906)

Libraries CI failures that generate dumps will now show the dump output in the console log itself.

* Change NativeAOT condition

* Pass the global property set to false in the post build step.

* Address suggestion:
- Avoid using yml extraHelixArguments.
- Add the new embedded SetCommands optional section in runner scripts.
- Set __IsXUnitLogCheckerSupported in tests.targets as SetScriptCommand items instead of directly in sendtohelixhelp.proj.

* Only add XUnitLogChecker as HelixPayload if its directory is found. Whether it was built or not has already been decided before, and the runner scripts know when to execute it.
  • Loading branch information
carlossanlop authored Nov 29, 2023
1 parent 0629552 commit 35b866c
Show file tree
Hide file tree
Showing 10 changed files with 170 additions and 124 deletions.
7 changes: 6 additions & 1 deletion Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@
<TestExclusionListTasksAssemblyPath>$([MSBuild]::NormalizePath('$(TestExclusionListTasksDir)', 'TestExclusionListTasks.dll'))</TestExclusionListTasksAssemblyPath>
<CoreCLRToolPath>$([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'coreclr', '$(TargetOS).$(TargetArchitecture).$(RuntimeConfiguration)'))</CoreCLRToolPath>
<ILAsmToolPath Condition="'$(DotNetBuildFromSource)' == 'true' or '$(BuildArchitecture)' == 's390x' or '$(BuildArchitecture)' == 'ppc64le'">$(CoreCLRToolPath)</ILAsmToolPath>

<WasmtimeDir Condition="'$(WasmtimeDir)' == '' and '$(WASMTIME_PATH)' != '' and Exists($(WASMTIME_PATH))">$(WASMTIME_PATH)</WasmtimeDir>
<WasmtimeDir Condition="'$(WasmtimeDir)' == ''">$([MSBuild]::NormalizeDirectory($(ArtifactsObjDir), 'wasmtime'))</WasmtimeDir>
<InstallWasmtimeForTests Condition="'$(InstallWasmtimeForTests)' == '' and !Exists($(WasmtimeDir))">true</InstallWasmtimeForTests>
Expand Down Expand Up @@ -332,6 +331,12 @@

<!-- this property is used by the SDK to pull in mono-based runtime packs -->
<UseMonoRuntime Condition="'$(UseMonoRuntime)' == '' and '$(RuntimeFlavor)' == 'Mono'">true</UseMonoRuntime>

<!-- For enabling the use of XUnitLogChecker in coreclr and libraries test runs. -->
<!-- TODO: Enable XUnitLogChecker for NativeAOT tests https://github.com/dotnet/runtime/issues/94722 -->
<IsXUnitLogCheckerSupported Condition="'$(IsXUnitLogCheckerSupported)' == ''">false</IsXUnitLogCheckerSupported>
<IsXUnitLogCheckerSupported Condition="'$(RuntimeFlavor)' == 'CoreCLR' and '$(TestNativeAot)' != 'true' and '$(RunNativeAotTestApps)' != 'true' and '$(TestSingleFile)' != 'true' and '$(TargetOS)' != 'browser' and '$(TargetOS)' != 'wasi' and '$(TargetOS)' != 'ios' and '$(TargetOS)' != 'iossimulator' and '$(TargetOS)' != 'tvos' and '$(TargetOS)' != 'tvossimulator' and '$(TargetOS)' != 'maccatalyst' and '$(TargetOS)' != 'android'">true</IsXUnitLogCheckerSupported>
<XUnitLogCheckerLibrariesOutDir>$([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'XUnitLogChecker'))</XUnitLogCheckerLibrariesOutDir>
</PropertyGroup>

<!-- Packaging -->
Expand Down
1 change: 1 addition & 0 deletions eng/pipelines/libraries/prepare-for-bin-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ steps:
ref/**
runtime/**
testhost/**
XUnitLogChecker/**
- task: CopyFiles@2
displayName: Prepare bin folders to publish
Expand Down
40 changes: 40 additions & 0 deletions eng/testing/RunnerTemplate.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ echo DOTNET_EnableExtraSuperPmiQueries=%DOTNET_EnableExtraSuperPmiQueries%
echo DOTNET_JitPath=%DOTNET_JitPath%
:skip_spmi_enable_collection

echo ========================= Begin custom configuration settings ==============================
[[SetCommandsEcho]]
[[SetCommands]]
echo ========================== End custom configuration settings ===============================

:: ========================= BEGIN Test Execution =============================
echo ----- start %DATE% %TIME% =============== To repro directly: =====================================================
echo pushd %EXECUTION_DIR%
Expand All @@ -92,6 +97,41 @@ if %_exit_code%==1 (
)
)
)

if NOT "%__IsXUnitLogCheckerSupported%"=="1" (
echo XUnitLogChecker not supported for this test case. Skipping.
GOTO SKIP_XUNITLOGCHECKER
)

echo ----- start =============== XUnitLogChecker Output =====================================================

set DOTNET_EXE=%RUNTIME_PATH%\dotnet.exe
set XUNITLOGCHECKER_DLL=%HELIX_CORRELATION_PAYLOAD%\XUnitLogChecker.dll
set XUNITLOGCHECKER_COMMAND=%DOTNET_EXE% --roll-forward Major %XUNITLOGCHECKER_DLL% --dumps-path %HELIX_DUMP_FOLDER%
set XUNITLOGCHECKER_EXIT_CODE=1

if NOT EXIST %DOTNET_EXE% (
echo dotnet.exe does not exist in the expected location: %DOTNET_EXE%
GOTO XUNITLOGCHECKER_END
) else if NOT EXIST %XUNITLOGCHECKER_DLL% (
echo XUnitLogChecker.dll does not exist in the expected location: %XUNITLOGCHECKER_DLL%
GOTO XUNITLOGCHECKER_END
)

echo %XUNITLOGCHECKER_COMMAND%
%XUNITLOGCHECKER_COMMAND%
set XUNITLOGCHECKER_EXIT_CODE=%ERRORLEVEL%

:XUNITLOGCHECKER_END

if %XUNITLOGCHECKER_EXIT_CODE% NEQ 0 (
set _exit_code=%XUNITLOGCHECKER_EXIT_CODE%
)

echo ----- end =============== XUnitLogChecker Output - exit code %XUNITLOGCHECKER_EXIT_CODE% ===============

:SKIP_XUNITLOGCHECKER

exit /b %_exit_code%
:: ========================= END Test Execution =================================

Expand Down
183 changes: 83 additions & 100 deletions eng/testing/RunnerTemplate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,80 +54,60 @@ exitcode_list[131]="SIGQUIT Ctrl-\ occurred. Core dumped."
exitcode_list[132]="SIGILL Illegal Instruction. Core dumped. Likely codegen issue."
exitcode_list[133]="SIGTRAP Breakpoint hit. Core dumped."
exitcode_list[134]="SIGABRT Abort. Managed or native assert, or runtime check such as heap corruption, caused call to abort(). Core dumped."
exitcode_list[135]="IGBUS Unaligned memory access. Core dumped."
exitcode_list[135]="IGBUS Unaligned memory access. Core dumped."
exitcode_list[136]="SIGFPE Bad floating point arguments. Core dumped."
exitcode_list[137]="SIGKILL Killed eg by kill"
exitcode_list[139]="SIGSEGV Illegal memory access. Deref invalid pointer, overrunning buffer, stack overflow etc. Core dumped."
exitcode_list[143]="SIGTERM Terminated. Usually before SIGKILL."
exitcode_list[159]="SIGSYS Bad System Call."

function print_info_from_core_file_using_lldb {
function move_core_file_to_temp_location {
local core_file_name=$1
local executable_name=$2
local plugin_path_name="$RUNTIME_PATH/shared/Microsoft.NETCore.App/9.9.9/libsosplugin.so"

# check for existence of lldb on the path
hash lldb 2>/dev/null || { echo >&2 "lldb was not found. Unable to print core file."; return; }
# Append the dmp extension to ensure XUnitLogChecker finds it
local new_location=$HELIX_DUMP_FOLDER/$core_file_name.dmp

# pe, clrstack, and dumpasync are defined in libsosplugin.so
if [ ! -f $plugin_path_name ]; then
echo $plugin_path_name cannot be found.
return
fi

echo ----- start =============== lldb Output =====================================================
echo Printing managed exceptions, managed call stacks, and async state machines.
lldb -O "settings set target.exec-search-paths $RUNTIME_PATH" -o "plugin load $plugin_path_name" -o "clrthreads -managedexception" -o "pe -nested" -o "clrstack -all -a -f" -o "dumpasync -fields -stacks -roots" -o "quit" --core $core_file_name $executable_name
echo ----- end =============== lldb Output =======================================================
}

function print_info_from_core_file_using_gdb {
local core_file_name=$1
local executable_name=$2

# Check for the existence of GDB on the path
hash gdb 2>/dev/null || { echo >&2 "GDB was not found. Unable to print core file."; return; }
echo "Copying dump file '$core_file_name' to '$new_location'"
cp $core_file_name $new_location

echo ----- start =============== GDB Output =====================================================
# Open the dump in GDB and print the stack from each thread. We can add more
# commands here if desired.
echo printing native stack.
gdb --batch -ex "thread apply all bt full" -ex "quit" $executable_name $core_file_name
echo ----- end =============== GDB Output =======================================================
# Delete the old one
rm $core_file_name
}

function print_info_from_core_file {
local core_file_name=$1
local executable_name=$RUNTIME_PATH/$2

if ! [ -e $executable_name ]; then
echo "Unable to find executable $executable_name"
return
elif ! [ -e $core_file_name ]; then
echo "Unable to find core file $core_file_name"
return
xunitlogchecker_exit_code=0
function invoke_xunitlogchecker {
local dump_folder=$1

total_dumps=$(find $dump_folder -name "*.dmp" | wc -l)

if [[ $total_dumps > 0 ]]; then
echo "Total dumps found in $dump_folder: $total_dumps"
xunitlogchecker_file_name="$HELIX_CORRELATION_PAYLOAD/XUnitLogChecker.dll"
dotnet_file_name="$RUNTIME_PATH/dotnet"

if [[ ! -f $dotnet_file_name ]]; then
echo "'$dotnet_file_name' was not found. Unable to run XUnitLogChecker."
xunitlogchecker_exit_code=1
elif [[ ! -f $xunitlogchecker_file_name ]]; then
echo "'$xunitlogchecker_file_name' was not found. Unable to print dump file contents."
xunitlogchecker_exit_code=2
elif [[ ! -d $dump_folder ]]; then
echo "The dump directory '$dump_folder' does not exist."
else
echo "Executing XUnitLogChecker in $dump_folder..."
cmd="$dotnet_file_name --roll-forward Major $xunitlogchecker_file_name --dumps-path $dump_folder"
echo "$cmd"
$cmd
xunitlogchecker_exit_code=$?
fi
else
echo "No dumps found in $dump_folder."
fi
echo "Printing info from core file $core_file_name"
print_info_from_core_file_using_gdb $core_file_name $executable_name
print_info_from_core_file_using_lldb $core_file_name $executable_name
}

function copy_core_file_to_temp_location {
local core_file_name=$1

local storage_location="/tmp/coredumps"

# Create the directory (this shouldn't fail even if it already exists).
mkdir -p $storage_location

local new_location=$storage_location/core.$RANDOM

echo "Copying core file $core_file_name to $new_location in case you need it."
cp $core_file_name $new_location
}

# ========================= BEGIN Core File Setup ============================
if [[ "$(uname -s)" == "Darwin" ]]; then
system_name="$(uname -s)"
if [[ $system_name == "Darwin" ]]; then
# On OS X, we will enable core dump generation only if there are no core
# files already in /cores/ at this point. This is being done to prevent
# inadvertently flooding the CI machines with dumps.
Expand All @@ -140,21 +120,11 @@ if [[ "$(uname -s)" == "Darwin" ]]; then
# https://github.com/dotnet/core-eng/issues/15597
ulimit -c 0
fi

elif [[ "$(uname -s)" == "Linux" ]]; then
# On Linux, we'll enable core file generation unconditionally, and if a dump
# is generated, we will print some useful information from it and delete the
# dump immediately.

if [ -e /proc/self/coredump_filter ]; then
# Include memory in private and shared file-backed mappings in the dump.
# This ensures that we can see disassembly from our shared libraries when
# inspecting the contents of the dump. See 'man core' for details.
echo -n 0x3F > /proc/self/coredump_filter
fi

ulimit -c unlimited
fi

export DOTNET_DbgEnableMiniDump=1
export DOTNET_EnableCrashReport=1
export DOTNET_DbgMiniDumpName=$HELIX_DUMP_FOLDER/coredump.%d.dmp
# ========================= END Core File Setup ==============================

# ========================= BEGIN support for SuperPMI collection ==============================
Expand All @@ -171,7 +141,7 @@ if [ ! -z $spmi_enable_collection ]; then
fi
mkdir -p $spmi_collect_dir
export spmi_file_extension=so
if [[ "$(uname -s)" == "Darwin" ]]; then
if [[ $system_name == "Darwin" ]]; then
export spmi_file_extension=dylib
fi
export SuperPMIShimLogPath=$spmi_collect_dir
Expand All @@ -193,6 +163,11 @@ if [ ! -z $spmi_enable_collection ]; then
fi
# ========================= END support for SuperPMI collection ==============================

echo ========================= Begin custom configuration settings ==============================
[[SetCommandsEcho]]
[[SetCommands]]
echo ========================== End custom configuration settings ===============================

# ========================= BEGIN Test Execution =============================
echo ----- start $(date) =============== To repro directly: =====================================================
echo pushd $EXECUTION_DIR
Expand Down Expand Up @@ -220,51 +195,59 @@ if [[ $test_exitcode -ne 0 ]]; then
echo ulimit -c value: $(ulimit -c)
fi

if [[ "$(uname -s)" == "Linux" && $test_exitcode -ne 0 ]]; then
if [ -n "$HELIX_WORKITEM_PAYLOAD" ]; then

# For abrupt failures, in Helix, dump some of the kernel log, in case there is a hint
if [[ $test_exitcode -ne 1 ]]; then
dmesg | tail -50
fi

have_sleep=$(which sleep)
if [ -x "$have_sleep" ]; then
echo Waiting a few seconds for any dump to be written..
sleep 10s
fi
fi

if [[ $system_name == "Linux" && $test_exitcode -ne 0 ]]; then
echo cat /proc/sys/kernel/core_pattern: $(cat /proc/sys/kernel/core_pattern)
echo cat /proc/sys/kernel/core_uses_pid: $(cat /proc/sys/kernel/core_uses_pid)
echo cat /proc/sys/kernel/coredump_filter: $(cat /proc/sys/kernel/coredump_filter)

echo Looking around for any Linux dump..

# Depending on distro/configuration, the core files may either be named "core"
# or "core.<PID>" by default. We read /proc/sys/kernel/core_uses_pid to
# determine which it is.
core_name_uses_pid=0
if [[ -e /proc/sys/kernel/core_uses_pid && "1" == $(cat /proc/sys/kernel/core_uses_pid) ]]; then
core_name_uses_pid=1
fi

# The osx dumps are too large to egress the machine
echo Looking around for any Linux dumps...

if [[ "$core_name_uses_pid" == "1" ]]; then
# We don't know what the PID of the process was, so let's look at all core
# files whose name matches core.NUMBER
echo Looking for files matching core.* ...
for f in core.*; do
[[ $f =~ core.[0-9]+ ]] && print_info_from_core_file "$f" "dotnet" && copy_core_file_to_temp_location "$f" && rm "$f"
echo "Looking for files matching core.* ..."
for f in $(find . -name "core.*"); do
[[ $f =~ core.[0-9]+ ]] && move_core_file_to_temp_location "$f"
done
elif [ -f core ]; then
echo found a dump named core in $EXECUTION_DIR !
print_info_from_core_file "core" "dotnet"
copy_core_file_to_temp_location "core"
rm "core"
else
echo ... found no dump in $PWD
fi

if [ -f core ]; then
move_core_file_to_temp_location "core"
fi
fi

if [ -n "$HELIX_WORKITEM_PAYLOAD" ]; then
# For abrupt failures, in Helix, dump some of the kernel log, in case there is a hint
if [[ $test_exitcode -ne 1 ]]; then
dmesg | tail -50
fi

fi

if [[ -z "$__IsXUnitLogCheckerSupported" ]]; then
echo "The '__IsXUnitLogCheckerSupported' env var is not set."
elif [[ "$__IsXUnitLogCheckerSupported" != "1" ]]; then
echo "XUnitLogChecker not supported for this test case. Skipping."
else
echo ----- start =============== XUnitLogChecker Output =====================================================

invoke_xunitlogchecker "$HELIX_DUMP_FOLDER"

if [[ $xunitlogchecker_exit_code -ne 0 ]]; then
test_exitcode=$xunitlogchecker_exit_code
fi
echo ----- end =============== XUnitLogChecker Output - exit code $xunitlogchecker_exit_code ===========================
fi

popd >/dev/null
# ======================== END Core File Inspection ==========================
# The helix work item should not exit with non-zero if tests ran and produced results
Expand Down
Loading

0 comments on commit 35b866c

Please sign in to comment.