GH-32570: [C++] Fix the issue of ExecBatchBuilder
when appending consecutive tail rows with the same id may exceed buffer boundary
#35022
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Licensed to the Apache Software Foundation (ASF) under one | |
# or more contributor license agreements. See the NOTICE file | |
# distributed with this work for additional information | |
# regarding copyright ownership. The ASF licenses this file | |
# to you under the Apache License, Version 2.0 (the | |
# "License"); you may not use this file except in compliance | |
# with the License. You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, | |
# software distributed under the License is distributed on an | |
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
# KIND, either express or implied. See the License for the | |
# specific language governing permissions and limitations | |
# under the License. | |
name: C++ | |
on: | |
push: | |
paths: | |
- '.github/workflows/cpp.yml' | |
- 'ci/docker/**' | |
- 'ci/scripts/cpp_*' | |
- 'ci/scripts/install_azurite.sh' | |
- 'ci/scripts/install_gcs_testbench.sh' | |
- 'ci/scripts/install_minio.sh' | |
- 'ci/scripts/msys2_*' | |
- 'ci/scripts/util_*' | |
- 'cpp/**' | |
- 'docker-compose.yml' | |
- 'format/Flight.proto' | |
pull_request: | |
paths: | |
- '.github/workflows/cpp.yml' | |
- 'ci/docker/**' | |
- 'ci/scripts/cpp_*' | |
- 'ci/scripts/install_azurite.sh' | |
- 'ci/scripts/install_gcs_testbench.sh' | |
- 'ci/scripts/install_minio.sh' | |
- 'ci/scripts/msys2_*' | |
- 'ci/scripts/util_*' | |
- 'cpp/**' | |
- 'docker-compose.yml' | |
- 'format/Flight.proto' | |
concurrency: | |
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} | |
cancel-in-progress: true | |
permissions: | |
contents: read | |
env: | |
ARROW_ENABLE_TIMING_TESTS: OFF | |
DOCKER_VOLUME_PREFIX: ".docker/" | |
jobs: | |
docker: | |
name: ${{ matrix.title }} | |
runs-on: ${{ matrix.runs-on }} | |
if: ${{ !contains(github.event.pull_request.title, 'WIP') }} | |
timeout-minutes: 75 | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
- arch: amd64 | |
clang-tools: "14" | |
image: conda-cpp | |
llvm: "14" | |
runs-on: ubuntu-latest | |
simd-level: AVX2 | |
title: AMD64 Conda C++ AVX2 | |
ubuntu: "22.04" | |
- arch: amd64 | |
clang-tools: "14" | |
image: ubuntu-cpp-sanitizer | |
llvm: "14" | |
runs-on: ubuntu-latest | |
title: AMD64 Ubuntu 22.04 C++ ASAN UBSAN | |
ubuntu: "22.04" | |
- arch: arm64v8 | |
clang-tools: "10" | |
image: ubuntu-cpp | |
llvm: "10" | |
runs-on: ["self-hosted", "arm", "linux"] | |
title: ARM64 Ubuntu 20.04 C++ | |
ubuntu: "20.04" | |
env: | |
ARCH: ${{ matrix.arch }} | |
ARROW_SIMD_LEVEL: ${{ matrix.simd-level }} | |
CLANG_TOOLS: ${{ matrix.clang-tools }} | |
LLVM: ${{ matrix.llvm }} | |
UBUNTU: ${{ matrix.ubuntu }} | |
steps: | |
- name: Checkout Arrow | |
uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 | |
with: | |
fetch-depth: 0 | |
submodules: recursive | |
- name: Cache Docker Volumes | |
uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 # v3.3.1 | |
with: | |
path: .docker | |
key: ${{ matrix.image }}-${{ hashFiles('cpp/**') }} | |
restore-keys: ${{ matrix.image }}- | |
- name: Setup Python | |
run: | | |
sudo apt update | |
sudo apt install -y --no-install-recommends python3 python3-dev python3-pip | |
- name: Setup Archery | |
run: python3 -m pip install -e dev/archery[docker] | |
- name: Execute Docker Build | |
env: | |
ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} | |
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} | |
run: | | |
sudo sysctl -w kernel.core_pattern="core.%e.%p" | |
ulimit -c unlimited | |
archery docker run ${{ matrix.image }} | |
- name: Docker Push | |
if: >- | |
success() && | |
github.event_name == 'push' && | |
github.repository == 'apache/arrow' && | |
github.ref_name == 'main' | |
env: | |
ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} | |
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} | |
continue-on-error: true | |
run: archery docker push ${{ matrix.image }} | |
build-example: | |
name: C++ Minimal Build Example | |
runs-on: ubuntu-latest | |
if: ${{ !contains(github.event.pull_request.title, 'WIP') }} | |
timeout-minutes: 45 | |
steps: | |
- name: Checkout Arrow | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: recursive | |
- name: Check CMake presets | |
run: | | |
cd cpp | |
cmake --list-presets | |
- name: Run minimal example | |
run: | | |
cd cpp/examples/minimal_build | |
docker-compose run --rm minimal | |
macos: | |
name: AMD64 macOS 12 C++ | |
runs-on: macos-latest | |
if: ${{ !contains(github.event.pull_request.title, 'WIP') }} | |
timeout-minutes: 75 | |
env: | |
ARROW_AZURE: ON | |
ARROW_BUILD_TESTS: ON | |
ARROW_DATASET: ON | |
ARROW_FLIGHT: ON | |
ARROW_GANDIVA: ON | |
ARROW_GCS: ON | |
ARROW_HDFS: ON | |
ARROW_HOME: /usr/local | |
ARROW_JEMALLOC: ON | |
ARROW_ORC: ON | |
ARROW_PARQUET: ON | |
ARROW_S3: ON | |
ARROW_SUBSTRAIT: ON | |
ARROW_WITH_BROTLI: ON | |
ARROW_WITH_BZ2: ON | |
ARROW_WITH_LZ4: ON | |
# GH-36013 disabling opentelemetry here because we can't | |
# get the patched version from conda | |
# ARROW_WITH_OPENTELEMETRY: ON | |
ARROW_WITH_SNAPPY: ON | |
ARROW_WITH_ZLIB: ON | |
ARROW_WITH_ZSTD: ON | |
GTest_SOURCE: BUNDLED | |
steps: | |
- name: CPU Info | |
run: | | |
sysctl -a | grep cpu | |
sysctl -a | grep "hw.optional" | |
- name: Checkout Arrow | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: recursive | |
- name: Install Dependencies | |
run: | | |
brew bundle --file=cpp/Brewfile | |
- name: Install MinIO | |
run: | | |
$(brew --prefix bash)/bin/bash \ | |
ci/scripts/install_minio.sh latest /usr/local | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.9 | |
- name: Install Google Cloud Storage Testbench | |
run: ci/scripts/install_gcs_testbench.sh default | |
- name: Install Azurite Storage Emulator | |
run: ci/scripts/install_azurite.sh | |
- name: Setup ccache | |
run: | | |
ci/scripts/ccache_setup.sh | |
- name: ccache info | |
id: ccache-info | |
run: | | |
echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT | |
- name: Cache ccache | |
uses: actions/cache@v3 | |
with: | |
path: ${{ steps.ccache-info.outputs.cache-dir }} | |
key: cpp-ccache-macos-${{ hashFiles('cpp/**') }} | |
restore-keys: cpp-ccache-macos- | |
- name: Build | |
run: | | |
ci/scripts/cpp_build.sh $(pwd) $(pwd)/build | |
- name: Test | |
shell: bash | |
run: | | |
sudo sysctl -w kern.coredump=1 | |
sudo sysctl -w kern.corefile=core.%N.%P | |
ulimit -c unlimited # must enable within the same shell | |
ci/scripts/cpp_test.sh $(pwd) $(pwd)/build | |
windows: | |
name: ${{ matrix.title }} | |
runs-on: ${{ matrix.os }} | |
if: ${{ !contains(github.event.pull_request.title, 'WIP') }} | |
timeout-minutes: 60 | |
strategy: | |
fail-fast: false | |
matrix: | |
os: | |
- windows-2019 | |
include: | |
- os: windows-2019 | |
simd-level: AVX2 | |
title: AMD64 Windows 2019 C++17 AVX2 | |
env: | |
ARROW_BOOST_USE_SHARED: OFF | |
ARROW_BUILD_BENCHMARKS: ON | |
ARROW_BUILD_SHARED: ON | |
ARROW_BUILD_STATIC: OFF | |
ARROW_BUILD_TESTS: ON | |
ARROW_DATASET: ON | |
ARROW_FLIGHT: OFF | |
ARROW_HDFS: ON | |
ARROW_HOME: /usr | |
ARROW_JEMALLOC: OFF | |
ARROW_MIMALLOC: ON | |
ARROW_ORC: ON | |
ARROW_PARQUET: ON | |
ARROW_SIMD_LEVEL: ${{ matrix.simd-level }} | |
ARROW_SUBSTRAIT: ON | |
ARROW_USE_GLOG: OFF | |
ARROW_VERBOSE_THIRDPARTY_BUILD: OFF | |
ARROW_WITH_BROTLI: OFF | |
ARROW_WITH_BZ2: OFF | |
ARROW_WITH_LZ4: OFF | |
ARROW_WITH_OPENTELEMETRY: OFF | |
ARROW_WITH_SNAPPY: ON | |
ARROW_WITH_ZLIB: ON | |
ARROW_WITH_ZSTD: ON | |
BOOST_SOURCE: BUNDLED | |
CMAKE_CXX_STANDARD: "17" | |
CMAKE_GENERATOR: Ninja | |
CMAKE_INSTALL_LIBDIR: bin | |
CMAKE_INSTALL_PREFIX: /usr | |
CMAKE_UNITY_BUILD: ON | |
steps: | |
- name: Disable Crash Dialogs | |
run: | | |
reg add ` | |
"HKCU\SOFTWARE\Microsoft\Windows\Windows Error Reporting" ` | |
/v DontShowUI ` | |
/t REG_DWORD ` | |
/d 1 ` | |
/f | |
- name: Installed Packages | |
run: choco list | |
- name: Install Dependencies | |
run: choco install -y --no-progress openssl | |
- name: Checkout Arrow | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: recursive | |
- name: Download Timezone Database | |
shell: bash | |
run: ci/scripts/download_tz_database.sh | |
- name: Install ccache | |
shell: bash | |
run: | | |
ci/scripts/install_ccache.sh 4.6.3 /usr | |
- name: Setup ccache | |
shell: bash | |
run: | | |
ci/scripts/ccache_setup.sh | |
- name: ccache info | |
id: ccache-info | |
shell: bash | |
run: | | |
echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT | |
- name: Cache ccache | |
uses: actions/cache@v3 | |
with: | |
path: ${{ steps.ccache-info.outputs.cache-dir }} | |
key: cpp-ccache-windows-${{ env.CACHE_VERSION }}-${{ hashFiles('cpp/**') }} | |
restore-keys: cpp-ccache-windows-${{ env.CACHE_VERSION }}- | |
env: | |
# We can invalidate the current cache by updating this. | |
CACHE_VERSION: "2022-09-13" | |
- name: Build | |
shell: cmd | |
run: | | |
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 | |
bash -c "ci/scripts/cpp_build.sh $(pwd) $(pwd)/build" | |
- name: Test | |
shell: bash | |
run: | | |
# For ORC | |
export TZDIR=/c/msys64/usr/share/zoneinfo | |
ci/scripts/cpp_test.sh $(pwd) $(pwd)/build | |
windows-mingw: | |
name: AMD64 Windows MinGW ${{ matrix.msystem_upper }} C++ | |
runs-on: windows-2019 | |
if: ${{ !contains(github.event.pull_request.title, 'WIP') }} | |
# Build may take 1h+ without cache. | |
timeout-minutes: 120 | |
strategy: | |
fail-fast: false | |
matrix: | |
include: | |
- msystem_lower: mingw32 | |
msystem_upper: MINGW32 | |
- msystem_lower: mingw64 | |
msystem_upper: MINGW64 | |
- msystem_lower: clang64 | |
msystem_upper: CLANG64 | |
env: | |
ARROW_BUILD_SHARED: ON | |
ARROW_BUILD_STATIC: OFF | |
ARROW_BUILD_TESTS: ON | |
ARROW_BUILD_TYPE: release | |
ARROW_DATASET: ON | |
ARROW_FLIGHT: ON | |
ARROW_FLIGHT_SQL: ON | |
ARROW_GANDIVA: ON | |
ARROW_GCS: ON | |
ARROW_HDFS: OFF | |
ARROW_HOME: /${{ matrix.msystem_lower}} | |
ARROW_JEMALLOC: OFF | |
ARROW_PARQUET: ON | |
ARROW_S3: ON | |
ARROW_SUBSTRAIT: ON | |
ARROW_USE_GLOG: OFF | |
ARROW_VERBOSE_THIRDPARTY_BUILD: OFF | |
ARROW_WITH_BROTLI: ON | |
ARROW_WITH_BZ2: ON | |
ARROW_WITH_LZ4: ON | |
ARROW_WITH_OPENTELEMETRY: OFF | |
ARROW_WITH_SNAPPY: ON | |
ARROW_WITH_ZLIB: ON | |
ARROW_WITH_ZSTD: ON | |
# Don't use preinstalled Boost by empty BOOST_ROOT and | |
# -DBoost_NO_BOOST_CMAKE=ON | |
BOOST_ROOT: "" | |
ARROW_CMAKE_ARGS: >- | |
-DARROW_PACKAGE_PREFIX=/${{ matrix.msystem_lower}} | |
-DBoost_NO_BOOST_CMAKE=ON | |
-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON | |
# We can't use unity build because we don't have enough memory on | |
# GitHub Actions. | |
# CMAKE_UNITY_BUILD: ON | |
GTest_SOURCE: BUNDLED | |
steps: | |
- name: Disable Crash Dialogs | |
run: | | |
reg add ` | |
"HKCU\SOFTWARE\Microsoft\Windows\Windows Error Reporting" ` | |
/v DontShowUI ` | |
/t REG_DWORD ` | |
/d 1 ` | |
/f | |
- name: Checkout Arrow | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: recursive | |
- uses: msys2/setup-msys2@v2 | |
with: | |
msystem: ${{ matrix.msystem_upper }} | |
update: true | |
- name: Setup MSYS2 | |
shell: msys2 {0} | |
run: ci/scripts/msys2_setup.sh cpp | |
- name: Cache ccache | |
uses: actions/cache@v3 | |
with: | |
path: ccache | |
key: cpp-ccache-${{ matrix.msystem_lower}}-${{ hashFiles('cpp/**') }} | |
restore-keys: cpp-ccache-${{ matrix.msystem_lower}}- | |
- name: Build | |
shell: msys2 {0} | |
run: | | |
export CMAKE_BUILD_PARALLEL_LEVEL=$NUMBER_OF_PROCESSORS | |
ci/scripts/cpp_build.sh "$(pwd)" "$(pwd)/build" | |
- name: Download Timezone Database | |
shell: bash | |
run: ci/scripts/download_tz_database.sh | |
- name: Download MinIO | |
shell: msys2 {0} | |
run: | | |
mkdir -p /usr/local/bin | |
wget \ | |
--output-document /usr/local/bin/minio.exe \ | |
https://dl.min.io/server/minio/release/windows-amd64/archive/minio.RELEASE.2022-05-26T05-48-41Z | |
chmod +x /usr/local/bin/minio.exe | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.9 | |
- name: Install Google Cloud Storage Testbench | |
shell: bash | |
run: | | |
ci/scripts/install_gcs_testbench.sh default | |
echo "PYTHON_BIN_DIR=$(cygpath --windows $(dirname $(which python3.exe)))" >> $GITHUB_ENV | |
- name: Test | |
shell: msys2 {0} | |
run: | | |
PATH="$(cygpath --unix ${PYTHON_BIN_DIR}):${PATH}" | |
ci/scripts/cpp_test.sh "$(pwd)" "$(pwd)/build" |