Skip to content

Commit

Permalink
GH-45006: [CI][Python] Fix test_memory failures (#45007)
Browse files Browse the repository at this point in the history
`test_memory.py` has started failing on some builds after #44951 was merged

* GitHub Issue: #45006

Lead-authored-by: Antoine Pitrou <pitrou@free.fr>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
  • Loading branch information
pitrou authored Dec 13, 2024
1 parent a1627b9 commit f9a6eda
Show file tree
Hide file tree
Showing 9 changed files with 30 additions and 28 deletions.
3 changes: 2 additions & 1 deletion ci/docker/conda-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ ENV PIPX_BASE_PYTHON=/opt/conda/bin/python3
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

# Ensure npm, node and azurite are on path. npm and node are required to install azurite, which will then need to
# Ensure npm, node and azurite are on path. npm and node are required to install azurite, which will then need to
# be on the path for the tests to run.
ENV PATH=/opt/conda/envs/arrow/bin:$PATH

Expand All @@ -68,6 +68,7 @@ ENV ARROW_ACERO=ON \
ARROW_GANDIVA=ON \
ARROW_GCS=ON \
ARROW_HOME=$CONDA_PREFIX \
ARROW_JEMALLOC=ON \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/debian-12-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ ENV ARROW_ACERO=ON \
ARROW_GANDIVA=ON \
ARROW_GCS=ON \
ARROW_HOME=/usr/local \
ARROW_JEMALLOC=ON \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/fedora-39-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ ENV ARROW_ACERO=ON \
ARROW_GANDIVA=ON \
ARROW_GCS=ON \
ARROW_HOME=/usr/local \
ARROW_JEMALLOC=ON \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/ubuntu-20.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ ENV absl_SOURCE=BUNDLED \
ARROW_HDFS=ON \
ARROW_HOME=/usr/local \
ARROW_INSTALL_NAME_RPATH=OFF \
ARROW_JEMALLOC=ON \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/ubuntu-22.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ ENV absl_SOURCE=BUNDLED \
ARROW_HDFS=ON \
ARROW_HOME=/usr/local \
ARROW_INSTALL_NAME_RPATH=OFF \
ARROW_JEMALLOC=ON \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/ubuntu-24.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ ENV ARROW_ACERO=ON \
ARROW_HDFS=ON \
ARROW_HOME=/usr/local \
ARROW_INSTALL_NAME_RPATH=OFF \
ARROW_JEMALLOC=ON \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
Expand Down
4 changes: 2 additions & 2 deletions ci/scripts/cpp_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,10 @@ else
-DARROW_GCS=${ARROW_GCS:-OFF} \
-DARROW_HDFS=${ARROW_HDFS:-ON} \
-DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \
-DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \
-DARROW_JEMALLOC=${ARROW_JEMALLOC:-OFF} \
-DARROW_JSON=${ARROW_JSON:-ON} \
-DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \
-DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \
-DARROW_MIMALLOC=${ARROW_MIMALLOC:-ON} \
-DARROW_ORC=${ARROW_ORC:-OFF} \
-DARROW_PARQUET=${ARROW_PARQUET:-OFF} \
-DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \
Expand Down
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,7 @@ services:
ARROW_FLIGHT_SQL: "OFF"
ARROW_FUZZING: "ON" # Check fuzz regressions
ARROW_JEMALLOC: "OFF"
ARROW_MIMALLOC: "OFF"
ARROW_ORC: "OFF"
ARROW_S3: "OFF"
ARROW_USE_ASAN: "ON"
Expand Down Expand Up @@ -677,6 +678,7 @@ services:
ARROW_FLIGHT: "OFF"
ARROW_FLIGHT_SQL: "OFF"
ARROW_JEMALLOC: "OFF"
ARROW_MIMALLOC: "OFF"
ARROW_ORC: "OFF"
ARROW_USE_TSAN: "ON"
command: *cpp-command
Expand Down
44 changes: 19 additions & 25 deletions python/pyarrow/tests/test_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

import contextlib
import os
import platform
import signal
import subprocess
import sys
Expand All @@ -30,15 +29,19 @@
pytestmark = pytest.mark.processes

possible_backends = ["system", "jemalloc", "mimalloc"]
# Backends which are expected to be present in all builds of PyArrow,
# except if the user manually recompiled Arrow C++.
mandatory_backends = ["system", "mimalloc"]

should_have_jemalloc = (sys.platform == "linux" and platform.machine() == 'x86_64')
should_have_mimalloc = sys.platform == "win32"

def backend_factory(backend_name):
return getattr(pa, f"{backend_name}_memory_pool")


def supported_factories():
yield pa.default_memory_pool
for backend in pa.supported_memory_backends():
yield getattr(pa, f"{backend}_memory_pool")
for backend_name in pa.supported_memory_backends():
yield backend_factory(backend_name)


@contextlib.contextmanager
Expand Down Expand Up @@ -149,17 +152,12 @@ def check_env_var(name, expected, *, expect_warning=False):


def test_env_var():
check_env_var("system", ["system"])
if should_have_jemalloc:
check_env_var("jemalloc", ["jemalloc"])
if should_have_mimalloc:
check_env_var("mimalloc", ["mimalloc"])
for backend_name in mandatory_backends:
check_env_var(backend_name, [backend_name])
check_env_var("nonexistent", possible_backends, expect_warning=True)


def test_specific_memory_pools():
specific_pools = set()

def test_memory_pool_factories():
def check(factory, name, *, can_fail=False):
if can_fail:
try:
Expand All @@ -169,23 +167,16 @@ def check(factory, name, *, can_fail=False):
else:
pool = factory()
assert pool.backend_name == name
specific_pools.add(pool)

check(pa.system_memory_pool, "system")
check(pa.jemalloc_memory_pool, "jemalloc",
can_fail=not should_have_jemalloc)
check(pa.mimalloc_memory_pool, "mimalloc",
can_fail=not should_have_mimalloc)
for backend_name in possible_backends:
check(backend_factory(backend_name), backend_name,
can_fail=backend_name not in mandatory_backends)


def test_supported_memory_backends():
backends = pa.supported_memory_backends()

assert "system" in backends
if should_have_jemalloc:
assert "jemalloc" in backends
if should_have_mimalloc:
assert "mimalloc" in backends
assert set(backends) >= set(mandatory_backends)
assert set(backends) <= set(possible_backends)


def run_debug_memory_pool(pool_factory, env_value):
Expand Down Expand Up @@ -246,6 +237,9 @@ def test_debug_memory_pool_warn(pool_factory):


def check_debug_memory_pool_disabled(pool_factory, env_value, msg):
if sys.maxsize < 2**32:
# GH-45011: mimalloc may print warnings in this test on 32-bit Linux, ignore.
pytest.skip("Test may fail on 32-bit platforms")
res = run_debug_memory_pool(pool_factory.__name__, env_value)
# The subprocess either returned successfully or was killed by a signal
# (due to writing out of bounds), depending on the underlying allocator.
Expand Down

0 comments on commit f9a6eda

Please sign in to comment.