-
-
Notifications
You must be signed in to change notification settings - Fork 2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
test(python): Memory usage test infrastructure, plus a test for #15098 (
#15285) Co-authored-by: Itamar Turner-Trauring <itamar@pythonspeed.com> Co-authored-by: Stijn de Gooijer <stijndegooijer@gmail.com>
- Loading branch information
1 parent
0061c92
commit 9c46183
Showing
5 changed files
with
221 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
//! Utilities for dealing with memory allocations. | ||
|
||
use std::alloc::GlobalAlloc; | ||
|
||
use libc::{c_int, c_uint, size_t, uintptr_t}; | ||
|
||
// When debug_assertions is enabled, use Python's tracemalloc to track memory | ||
// allocations. This is a useful feature for production use too, but has a | ||
// potential performance impact and so would need additional benchmarking. In | ||
// addition, these APIs are not part of the limited Python ABI Polars uses, | ||
// though they are unchanged between 3.7 and 3.12. | ||
#[cfg(not(target_os = "windows"))] | ||
extern "C" { | ||
fn PyTraceMalloc_Track(domain: c_uint, ptr: uintptr_t, size: size_t) -> c_int; | ||
fn PyTraceMalloc_Untrack(domain: c_uint, ptr: uintptr_t) -> c_int; | ||
} | ||
|
||
// Windows has issues linking to the tracemalloc APIs, so the functionality is | ||
// disabled. We have fake implementations just to make sure we don't have | ||
// issues building. | ||
#[cfg(target_os = "windows")] | ||
#[allow(non_snake_case)] | ||
fn PyTraceMalloc_Track(_domain: c_uint, _ptr: uintptr_t, _size: size_t) -> c_int { | ||
-2 | ||
} | ||
|
||
#[cfg(target_os = "windows")] | ||
#[allow(non_snake_case)] | ||
fn PyTraceMalloc_Untrack(_domain: c_uint, _ptr: uintptr_t) -> c_int { | ||
-2 | ||
} | ||
|
||
/// Allocations require a domain to identify them when registering with | ||
/// tracemalloc. Following NumPy's lead, we just pick a random constant that is | ||
/// unlikely to clash with anyone else. | ||
const TRACEMALLOC_DOMAIN: c_uint = 36740582; | ||
|
||
/// Wrap an existing allocator, and register allocations and frees with Python's | ||
/// `tracemalloc`. Registration functionality is disabled on Windows. | ||
pub struct TracemallocAllocator<A: GlobalAlloc> { | ||
wrapped_alloc: A, | ||
} | ||
|
||
impl<A: GlobalAlloc> TracemallocAllocator<A> { | ||
/// Wrap the allocator such that allocations are registered with | ||
/// tracemalloc. | ||
pub const fn new(wrapped_alloc: A) -> Self { | ||
Self { wrapped_alloc } | ||
} | ||
} | ||
|
||
unsafe impl<A: GlobalAlloc> GlobalAlloc for TracemallocAllocator<A> { | ||
unsafe fn alloc(&self, layout: std::alloc::Layout) -> *mut u8 { | ||
let result = self.wrapped_alloc.alloc(layout); | ||
PyTraceMalloc_Track(TRACEMALLOC_DOMAIN, result as uintptr_t, layout.size()); | ||
result | ||
} | ||
|
||
unsafe fn dealloc(&self, ptr: *mut u8, layout: std::alloc::Layout) { | ||
PyTraceMalloc_Untrack(TRACEMALLOC_DOMAIN, ptr as uintptr_t); | ||
self.wrapped_alloc.dealloc(ptr, layout) | ||
} | ||
|
||
unsafe fn alloc_zeroed(&self, layout: std::alloc::Layout) -> *mut u8 { | ||
let result = self.wrapped_alloc.alloc_zeroed(layout); | ||
PyTraceMalloc_Track(TRACEMALLOC_DOMAIN, result as uintptr_t, layout.size()); | ||
result | ||
} | ||
|
||
unsafe fn realloc(&self, ptr: *mut u8, layout: std::alloc::Layout, new_size: usize) -> *mut u8 { | ||
let result = self.wrapped_alloc.realloc(ptr, layout, new_size); | ||
PyTraceMalloc_Track(TRACEMALLOC_DOMAIN, result as uintptr_t, new_size); | ||
result | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
"""Tests for the testing infrastructure.""" | ||
|
||
import numpy as np | ||
|
||
import polars as pl | ||
from tests.unit.conftest import MemoryUsage | ||
|
||
|
||
def test_memory_usage(memory_usage_without_pyarrow: MemoryUsage) -> None: | ||
"""The ``memory_usage`` fixture gives somewhat accurate results.""" | ||
memory_usage = memory_usage_without_pyarrow | ||
assert memory_usage.get_current() < 100_000 | ||
assert memory_usage.get_peak() < 100_000 | ||
|
||
# Memory from Python is tracked: | ||
b = b"X" * 1_300_000 | ||
assert 1_300_000 <= memory_usage.get_current() <= 2_000_000 | ||
assert 1_300_000 <= memory_usage.get_peak() <= 2_000_000 | ||
del b | ||
assert memory_usage.get_current() <= 500_000 | ||
assert 1_300_000 <= memory_usage.get_peak() <= 2_000_000 | ||
memory_usage.reset_tracking() | ||
assert memory_usage.get_current() < 100_000 | ||
assert memory_usage.get_peak() < 100_000 | ||
|
||
# Memory from Polars is tracked: | ||
df = pl.DataFrame({"x": pl.arange(0, 1_000_000, eager=True, dtype=pl.Int64)}) | ||
del df | ||
peak_bytes = memory_usage.get_peak() | ||
assert 8_000_000 <= peak_bytes < 8_500_000 | ||
|
||
memory_usage.reset_tracking() | ||
assert memory_usage.get_peak() < 1_000_000 | ||
|
||
# Memory from NumPy is tracked: | ||
arr = np.ones((1_400_000,), dtype=np.uint8) | ||
del arr | ||
peak = memory_usage.get_peak() | ||
assert 1_400_000 < peak < 1_500_000 |