Skip to content

Commit

Permalink
test(python): Memory usage test infrastructure, plus a test for #15098 (
Browse files Browse the repository at this point in the history
#15285)

Co-authored-by: Itamar Turner-Trauring <itamar@pythonspeed.com>
Co-authored-by: Stijn de Gooijer <stijndegooijer@gmail.com>
  • Loading branch information
3 people authored Mar 28, 2024
1 parent 0061c92 commit 9c46183
Show file tree
Hide file tree
Showing 5 changed files with 221 additions and 3 deletions.
15 changes: 13 additions & 2 deletions py-polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ mod gil_once_cell;
mod lazyframe;
mod lazygroupby;
mod map;
mod memory;
#[cfg(feature = "object")]
mod object;
#[cfg(feature = "object")]
Expand Down Expand Up @@ -62,16 +63,26 @@ use crate::expr::PyExpr;
use crate::functions::PyStringCacheHolder;
use crate::lazyframe::{PyInProcessQuery, PyLazyFrame};
use crate::lazygroupby::PyLazyGroupBy;
#[cfg(debug_assertions)]
use crate::memory::TracemallocAllocator;
use crate::series::PySeries;
#[cfg(feature = "sql")]
use crate::sql::PySQLContext;

// On Windows tracemalloc does work. However, we build abi3 wheels, and the
// relevant C APIs are not part of the limited stable CPython API. As a result,
// linking breaks on Windows if we use tracemalloc C APIs. So we only use this
// on Windows for now.
#[global_allocator]
#[cfg(all(target_family = "unix", not(use_mimalloc)))]
#[cfg(all(target_family = "unix", debug_assertions))]
static ALLOC: TracemallocAllocator<Jemalloc> = TracemallocAllocator::new(Jemalloc);

#[global_allocator]
#[cfg(all(target_family = "unix", not(use_mimalloc), not(debug_assertions)))]
static ALLOC: Jemalloc = Jemalloc;

#[global_allocator]
#[cfg(any(not(target_family = "unix"), use_mimalloc))]
#[cfg(all(any(not(target_family = "unix"), use_mimalloc), not(debug_assertions)))]
static ALLOC: MiMalloc = MiMalloc;

#[pymodule]
Expand Down
75 changes: 75 additions & 0 deletions py-polars/src/memory.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
//! Utilities for dealing with memory allocations.

use std::alloc::GlobalAlloc;

use libc::{c_int, c_uint, size_t, uintptr_t};

// When debug_assertions is enabled, use Python's tracemalloc to track memory
// allocations. This is a useful feature for production use too, but has a
// potential performance impact and so would need additional benchmarking. In
// addition, these APIs are not part of the limited Python ABI Polars uses,
// though they are unchanged between 3.7 and 3.12.
#[cfg(not(target_os = "windows"))]
extern "C" {
fn PyTraceMalloc_Track(domain: c_uint, ptr: uintptr_t, size: size_t) -> c_int;
fn PyTraceMalloc_Untrack(domain: c_uint, ptr: uintptr_t) -> c_int;
}

// Windows has issues linking to the tracemalloc APIs, so the functionality is
// disabled. We have fake implementations just to make sure we don't have
// issues building.
#[cfg(target_os = "windows")]
#[allow(non_snake_case)]
fn PyTraceMalloc_Track(_domain: c_uint, _ptr: uintptr_t, _size: size_t) -> c_int {
-2
}

#[cfg(target_os = "windows")]
#[allow(non_snake_case)]
fn PyTraceMalloc_Untrack(_domain: c_uint, _ptr: uintptr_t) -> c_int {
-2
}

/// Allocations require a domain to identify them when registering with
/// tracemalloc. Following NumPy's lead, we just pick a random constant that is
/// unlikely to clash with anyone else.
const TRACEMALLOC_DOMAIN: c_uint = 36740582;

/// Wrap an existing allocator, and register allocations and frees with Python's
/// `tracemalloc`. Registration functionality is disabled on Windows.
pub struct TracemallocAllocator<A: GlobalAlloc> {
wrapped_alloc: A,
}

impl<A: GlobalAlloc> TracemallocAllocator<A> {
/// Wrap the allocator such that allocations are registered with
/// tracemalloc.
pub const fn new(wrapped_alloc: A) -> Self {
Self { wrapped_alloc }
}
}

unsafe impl<A: GlobalAlloc> GlobalAlloc for TracemallocAllocator<A> {
unsafe fn alloc(&self, layout: std::alloc::Layout) -> *mut u8 {
let result = self.wrapped_alloc.alloc(layout);
PyTraceMalloc_Track(TRACEMALLOC_DOMAIN, result as uintptr_t, layout.size());
result
}

unsafe fn dealloc(&self, ptr: *mut u8, layout: std::alloc::Layout) {
PyTraceMalloc_Untrack(TRACEMALLOC_DOMAIN, ptr as uintptr_t);
self.wrapped_alloc.dealloc(ptr, layout)
}

unsafe fn alloc_zeroed(&self, layout: std::alloc::Layout) -> *mut u8 {
let result = self.wrapped_alloc.alloc_zeroed(layout);
PyTraceMalloc_Track(TRACEMALLOC_DOMAIN, result as uintptr_t, layout.size());
result
}

unsafe fn realloc(&self, ptr: *mut u8, layout: std::alloc::Layout, new_size: usize) -> *mut u8 {
let result = self.wrapped_alloc.realloc(ptr, layout, new_size);
PyTraceMalloc_Track(TRACEMALLOC_DOMAIN, result as uintptr_t, new_size);
result
}
}
64 changes: 63 additions & 1 deletion py-polars/tests/unit/conftest.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from __future__ import annotations

import gc
import random
import string
from typing import List, cast
import sys
import tracemalloc
from typing import Any, Generator, List, cast

import numpy as np
import pytest
Expand Down Expand Up @@ -138,3 +141,62 @@ def iso8601_tz_aware_format_datetime(request: pytest.FixtureRequest) -> list[str
@pytest.fixture(params=ISO8601_FORMATS_DATE)
def iso8601_format_date(request: pytest.FixtureRequest) -> list[str]:
return cast(List[str], request.param)


class MemoryUsage:
"""
Provide an API for measuring peak memory usage.
Memory from PyArrow is not tracked at the moment.
"""

def reset_tracking(self) -> None:
"""Reset tracking to zero."""
gc.collect()
tracemalloc.stop()
tracemalloc.start()
assert self.get_peak() < 100_000

def get_current(self) -> int:
"""
Return currently allocated memory, in bytes.
This only tracks allocations since this object was created or
``reset_tracking()`` was called, whichever is later.
"""
return tracemalloc.get_traced_memory()[0]

def get_peak(self) -> int:
"""
Return peak allocated memory, in bytes.
This returns peak allocations since this object was created or
``reset_tracking()`` was called, whichever is later.
"""
return tracemalloc.get_traced_memory()[1]


@pytest.fixture()
def memory_usage_without_pyarrow() -> Generator[MemoryUsage, Any, Any]:
"""
Provide an API for measuring peak memory usage.
Not thread-safe: there should only be one instance of MemoryUsage at any
given time.
Memory usage from PyArrow is not tracked.
"""
if not pl.build_info()["build"]["debug"]:
pytest.skip("Memory usage only available in debug/dev builds.")

if sys.platform == "win32":
# abi3 wheels don't have the tracemalloc C APIs, which breaks linking
# on Windows.
pytest.skip("Windows not supported at the moment.")

gc.collect()
tracemalloc.start()
try:
yield MemoryUsage()
finally:
tracemalloc.stop()
31 changes: 31 additions & 0 deletions py-polars/tests/unit/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pathlib import Path

from polars.type_aliases import ParquetCompression
from tests.unit.conftest import MemoryUsage


def test_round_trip(df: pl.DataFrame) -> None:
Expand Down Expand Up @@ -788,3 +789,33 @@ def test_parquet_array_statistics(tmp_path: Path) -> None:

result = pl.scan_parquet(file_path).filter(pl.col("a") != [1, 2, 3]).collect()
assert result.to_dict(as_series=False) == {"a": [[4, 5, 6], [7, 8, 9]], "b": [2, 3]}


@pytest.mark.write_disk()
def test_read_parquet_only_loads_selected_columns_15098(
memory_usage_without_pyarrow: MemoryUsage, tmp_path: Path
) -> None:
"""Only requested columns are loaded by ``read_parquet()``."""
tmp_path.mkdir(exist_ok=True)

# Each column will be about 8MB of RAM
series = pl.arange(0, 1_000_000, dtype=pl.Int64, eager=True)

file_path = tmp_path / "multicolumn.parquet"
df = pl.DataFrame(
{
"a": series,
"b": series,
}
)
df.write_parquet(file_path)
del df, series

memory_usage_without_pyarrow.reset_tracking()

# Only load one column:
df = pl.read_parquet([file_path], columns=["b"], rechunk=False)
del df
# Only one column's worth of memory should be used; 2 columns would be
# 16_000_000 at least, but there's some overhead.
assert 8_000_000 < memory_usage_without_pyarrow.get_peak() < 13_000_000
39 changes: 39 additions & 0 deletions py-polars/tests/unit/test_conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""Tests for the testing infrastructure."""

import numpy as np

import polars as pl
from tests.unit.conftest import MemoryUsage


def test_memory_usage(memory_usage_without_pyarrow: MemoryUsage) -> None:
"""The ``memory_usage`` fixture gives somewhat accurate results."""
memory_usage = memory_usage_without_pyarrow
assert memory_usage.get_current() < 100_000
assert memory_usage.get_peak() < 100_000

# Memory from Python is tracked:
b = b"X" * 1_300_000
assert 1_300_000 <= memory_usage.get_current() <= 2_000_000
assert 1_300_000 <= memory_usage.get_peak() <= 2_000_000
del b
assert memory_usage.get_current() <= 500_000
assert 1_300_000 <= memory_usage.get_peak() <= 2_000_000
memory_usage.reset_tracking()
assert memory_usage.get_current() < 100_000
assert memory_usage.get_peak() < 100_000

# Memory from Polars is tracked:
df = pl.DataFrame({"x": pl.arange(0, 1_000_000, eager=True, dtype=pl.Int64)})
del df
peak_bytes = memory_usage.get_peak()
assert 8_000_000 <= peak_bytes < 8_500_000

memory_usage.reset_tracking()
assert memory_usage.get_peak() < 1_000_000

# Memory from NumPy is tracked:
arr = np.ones((1_400_000,), dtype=np.uint8)
del arr
peak = memory_usage.get_peak()
assert 1_400_000 < peak < 1_500_000

0 comments on commit 9c46183

Please sign in to comment.