From e710b6eb3775f1c269e5ae82737ace449710771a Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 11 Sep 2024 02:19:59 +0200 Subject: [PATCH] MINOR: [CI][C++] Enable core dumps and stack traces in Linux/macOS jobs (#43937) ### Rationale for this change In https://github.com/apache/arrow/pull/43936 I noticed that core dumps were not written out for crashing C++ tests. One problem is that, by default, Ubuntu hosts pipe core dumps to `apport`, but it is not available inside containers. Another is that the `ulimit` must be set in the host, not in the container. In addition, this PR restores automatic traceback generation when running C++ tests, on Linux and macOS jobs. ### Are these changes tested? Manually by introducing a spurious segfault and running Docker containers. ### Are there any user-facing changes? No. Lead-authored-by: Antoine Pitrou Co-authored-by: Antoine Pitrou Co-authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- .github/workflows/cpp.yml | 5 ++- .github/workflows/dev.yml | 3 +- .github/workflows/integration.yml | 1 + .github/workflows/java_jni.yml | 4 ++- .github/workflows/js.yml | 3 +- .github/workflows/python.yml | 3 +- .github/workflows/r.yml | 6 ++-- .github/workflows/ruby.yml | 3 +- .github/workflows/swift.yml | 3 +- ci/docker/fedora-39-cpp.dockerfile | 1 + ci/docker/ubuntu-20.04-cpp-minimal.dockerfile | 1 + ci/docker/ubuntu-22.04-cpp-minimal.dockerfile | 1 + ci/docker/ubuntu-24.04-cpp-minimal.dockerfile | 1 + ci/scripts/util_enable_core_dumps.sh | 33 +++++++++++++++++++ cpp/build-support/run-test.sh | 23 ++++++++----- dev/tasks/docker-tests/github.cuda.yml | 1 + dev/tasks/docker-tests/github.linux.yml | 1 + dev/tasks/python-wheels/github.linux.yml | 1 + dev/tasks/r/github.packages.yml | 3 +- docker-compose.yml | 4 +-- 20 files changed, 70 insertions(+), 31 deletions(-) create mode 100644 ci/scripts/util_enable_core_dumps.sh diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 4a01d2f8e3aab..f5c8b6a7201be 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -155,8 +155,7 @@ jobs: run: | # GH-40558: reduce ASLR to avoid ASAN/LSAN crashes sudo sysctl -w vm.mmap_rnd_bits=28 - sudo sysctl -w kernel.core_pattern="core.%e.%p" - ulimit -c unlimited + source ci/scripts/util_enable_core_dumps.sh archery docker run ${{ matrix.image }} - name: Docker Push if: >- @@ -272,7 +271,7 @@ jobs: shell: bash run: | sudo sysctl -w kern.coredump=1 - sudo sysctl -w kern.corefile=core.%N.%P + sudo sysctl -w kern.corefile=/tmp/core.%N.%P ulimit -c unlimited # must enable within the same shell ci/scripts/cpp_test.sh $(pwd) $(pwd)/build diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 1cc8d993498b6..3879a045fd239 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -67,8 +67,7 @@ jobs: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} run: | - sudo sysctl -w kernel.core_pattern="core.%e.%p" - ulimit -c unlimited + source ci/scripts/util_enable_core_dumps.sh archery docker run -e GITHUB_ACTIONS=true ubuntu-lint - name: Docker Push if: >- diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index ecf89bff8f600..2d19b1e59b27a 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -101,6 +101,7 @@ jobs: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} run: > + source ci/scripts/util_enable_core_dumps.sh archery docker run \ -e ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} \ -e ARCHERY_INTEGRATION_WITH_NANOARROW=1 \ diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml index f2ecc801dc724..e730a5bf3e672 100644 --- a/.github/workflows/java_jni.yml +++ b/.github/workflows/java_jni.yml @@ -81,7 +81,9 @@ jobs: env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - run: archery docker run java-jni-manylinux-2014 + run: | + source ci/scripts/util_enable_core_dumps.sh + archery docker run java-jni-manylinux-2014 - name: Docker Push if: >- success() && diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml index 17b57c42b62f6..9ab4edf0851cd 100644 --- a/.github/workflows/js.yml +++ b/.github/workflows/js.yml @@ -66,8 +66,7 @@ jobs: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} run: | - sudo sysctl -w kernel.core_pattern="core.%e.%p" - ulimit -c unlimited + source ci/scripts/util_enable_core_dumps.sh archery docker run debian-js - name: Docker Push if: >- diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 6e83b727593b4..45efd305aa8f6 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -119,8 +119,7 @@ jobs: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} run: | - sudo sysctl -w kernel.core_pattern="core.%e.%p" - ulimit -c unlimited + source ci/scripts/util_enable_core_dumps.sh archery docker run ${{ matrix.image }} - name: Docker Push if: >- diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index bd1631db4f617..92e0e63fb7ea5 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -158,8 +158,7 @@ jobs: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} run: | - sudo sysctl -w kernel.core_pattern="core.%e.%p" - ulimit -c unlimited + source ci/scripts/util_enable_core_dumps.sh # Setting a non-default and non-probable Marquesas French Polynesia time # it has both with a .45 offset and very very few people who live there. archery docker run -e TZ=MART -e ARROW_R_FORCE_TESTS=${{ matrix.force-tests }} ubuntu-r @@ -218,8 +217,7 @@ jobs: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} run: | - sudo sysctl -w kernel.core_pattern="core.%e.%p" - ulimit -c unlimited + source ci/scripts/util_enable_core_dumps.sh # Don't set a TZ here to test that case. These builds will have the following warning in them: # System has not been booted with systemd as init system (PID 1). Can't operate. # Failed to connect to bus: Host is down diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index c4a7f31f4a94c..05b7b317ffd96 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -95,8 +95,7 @@ jobs: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} run: | - sudo sysctl -w kernel.core_pattern="core.%e.%p" - ulimit -c unlimited + source ci/scripts/util_enable_core_dumps.sh archery docker run \ -e ARROW_FLIGHT=ON \ -e ARROW_FLIGHT_SQL=ON \ diff --git a/.github/workflows/swift.yml b/.github/workflows/swift.yml index 86eb113dfc833..87aa5cb83f714 100644 --- a/.github/workflows/swift.yml +++ b/.github/workflows/swift.yml @@ -65,8 +65,7 @@ jobs: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} run: | - sudo sysctl -w kernel.core_pattern="core.%e.%p" - ulimit -c unlimited + source ci/scripts/util_enable_core_dumps.sh archery docker run ubuntu-swift - name: Docker Push if: >- diff --git a/ci/docker/fedora-39-cpp.dockerfile b/ci/docker/fedora-39-cpp.dockerfile index 33d11823094ce..2ac5afe7b91f6 100644 --- a/ci/docker/fedora-39-cpp.dockerfile +++ b/ci/docker/fedora-39-cpp.dockerfile @@ -34,6 +34,7 @@ RUN dnf update -y && \ curl-devel \ gcc \ gcc-c++ \ + gdb \ gflags-devel \ git \ glog-devel \ diff --git a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile index 4d867a448c994..1b342df596c9d 100644 --- a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile @@ -29,6 +29,7 @@ RUN apt-get update -y -q && \ ccache \ cmake \ curl \ + gdb \ git \ libssl-dev \ libcurl4-openssl-dev \ diff --git a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile index f26cad51f0983..ce31c457e909e 100644 --- a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile @@ -29,6 +29,7 @@ RUN apt-get update -y -q && \ ccache \ cmake \ curl \ + gdb \ git \ libssl-dev \ libcurl4-openssl-dev \ diff --git a/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile index 125bc7ba46a81..a1fd178a2c754 100644 --- a/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile +++ b/ci/docker/ubuntu-24.04-cpp-minimal.dockerfile @@ -29,6 +29,7 @@ RUN apt-get update -y -q && \ ccache \ cmake \ curl \ + gdb \ git \ libssl-dev \ libcurl4-openssl-dev \ diff --git a/ci/scripts/util_enable_core_dumps.sh b/ci/scripts/util_enable_core_dumps.sh new file mode 100644 index 0000000000000..09f8d2d727099 --- /dev/null +++ b/ci/scripts/util_enable_core_dumps.sh @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# NOTE: this script is not marked executable as it should be source'd +# for `ulimit` to take effect. + +set -e + +platform=$(uname) + +if [ "${platform}" = "Linux" ]; then + # We need to override `core_pattern` because + # 1. the original setting may reference apport, which is not available under + # most Docker containers; + # 2. we want to write the core file in a well-known directory. + sudo sysctl -w kernel.core_pattern="/tmp/core.%e.%p" +fi + +ulimit -c unlimited diff --git a/cpp/build-support/run-test.sh b/cpp/build-support/run-test.sh index 8e42438a23c1c..55e3fe0980749 100755 --- a/cpp/build-support/run-test.sh +++ b/cpp/build-support/run-test.sh @@ -121,12 +121,15 @@ function print_coredumps() { # patterns must be set with prefix `core.{test-executable}*`: # # In case of macOS: - # sudo sysctl -w kern.corefile=core.%N.%P + # sudo sysctl -w kern.corefile=/tmp/core.%N.%P # On Linux: - # sudo sysctl -w kernel.core_pattern=core.%e.%p + # sudo sysctl -w kernel.core_pattern=/tmp/core.%e.%p # # and the ulimit must be increased: # ulimit -c unlimited + # + # If the tests are run in a Docker container, the instructions are slightly + # different: see the 'Coredumps' comment section in `docker-compose.yml`. # filename is truncated to the first 15 characters in case of linux, so limit # the pattern for the first 15 characters @@ -134,19 +137,21 @@ function print_coredumps() { FILENAME=$(echo ${FILENAME} | cut -c-15) PATTERN="^core\.${FILENAME}" - COREFILES=$(ls | grep $PATTERN) + COREFILES=$(ls /tmp | grep $PATTERN) if [ -n "$COREFILES" ]; then - echo "Found core dump, printing backtrace:" - for COREFILE in $COREFILES; do + COREPATH="/tmp/${COREFILE}" + echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" + echo "Running '${TEST_EXECUTABLE}' produced core dump at '${COREPATH}', printing backtrace:" # Print backtrace if [ "$(uname)" == "Darwin" ]; then - lldb -c "${COREFILE}" --batch --one-line "thread backtrace all -e true" + lldb -c "${COREPATH}" --batch --one-line "thread backtrace all -e true" else - gdb -c "${COREFILE}" $TEST_EXECUTABLE -ex "thread apply all bt" -ex "set pagination 0" -batch + gdb -c "${COREPATH}" $TEST_EXECUTABLE -ex "thread apply all bt" -ex "set pagination 0" -batch fi - # Remove the coredump, regenerate it via running the test case directly - rm "${COREFILE}" + echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" + # Remove the coredump, it can be regenerated via running the test case directly + rm "${COREPATH}" done fi } diff --git a/dev/tasks/docker-tests/github.cuda.yml b/dev/tasks/docker-tests/github.cuda.yml index 8c04da8a91a4f..d03b3657afc53 100644 --- a/dev/tasks/docker-tests/github.cuda.yml +++ b/dev/tasks/docker-tests/github.cuda.yml @@ -38,6 +38,7 @@ jobs: env: {{ macros.github_set_sccache_envvars()|indent(8) }} run: | + source arrow/ci/scripts/util_enable_core_dumps.sh archery docker run \ -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \ {{ flags|default("") }} \ diff --git a/dev/tasks/docker-tests/github.linux.yml b/dev/tasks/docker-tests/github.linux.yml index 28d3203c1ed48..cd2923a50d6df 100644 --- a/dev/tasks/docker-tests/github.linux.yml +++ b/dev/tasks/docker-tests/github.linux.yml @@ -38,6 +38,7 @@ jobs: run: | # GH-40558: reduce ASLR to avoid TSAN crashing sudo sysctl -w vm.mmap_rnd_bits=28 + source arrow/ci/scripts/util_enable_core_dumps.sh archery docker run \ -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \ {{ flags|default("") }} \ diff --git a/dev/tasks/python-wheels/github.linux.yml b/dev/tasks/python-wheels/github.linux.yml index f9df27ba3175b..d9dbef82a948e 100644 --- a/dev/tasks/python-wheels/github.linux.yml +++ b/dev/tasks/python-wheels/github.linux.yml @@ -59,6 +59,7 @@ jobs: - name: Test wheel shell: bash run: | + source arrow/ci/scripts/util_enable_core_dumps.sh archery docker run python-wheel-manylinux-test-imports archery docker run python-wheel-manylinux-test-unittests diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index 0539eae6cc9d9..db6955b92d1e0 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -140,8 +140,7 @@ jobs: UBUNTU: {{ '"${{ matrix.ubuntu }}"' }} {{ macros.github_set_sccache_envvars()|indent(8) }} run: | - sudo sysctl -w kernel.core_pattern="core.%e.%p" - ulimit -c unlimited + source ci/scripts/util_enable_core_dumps.sh archery docker run \ -e EXTRA_CMAKE_FLAGS="{{ '${{ matrix.extra-cmake-flags }}' }}" \ {{ '${{ matrix.os }}' }}-cpp-static diff --git a/docker-compose.yml b/docker-compose.yml index 8721eef524a19..6d9b738d8da35 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -38,11 +38,11 @@ # WARNING: setting this will affect the host machine. # # Linux host: -# $ sudo sysctl -w kernel.core_pattern=core.%e.%p +# $ sudo sysctl -w kernel.core_pattern=/tmp/core.%e.%p # # macOS host running Docker for Mac (won't persist between restarts): # $ screen ~/Library/Containers/com.docker.docker/Data/vms/0/tty -# # echo "core.%e.%p" > /proc/sys/kernel/core_pattern +# # echo "/tmp/core.%e.%p" > /proc/sys/kernel/core_pattern # # The setup attempts to generate coredumps by default, but the correct paths # above must be set. In order to disable the coredump generation set