From 073329fc9b579ff06f0ad41bd507b6e268a62430 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Wed, 22 Mar 2023 22:48:48 +0400 Subject: [PATCH] feat(python,rust): include `set_fmt_float` value in `Config` load/save state (#7696) --- polars/polars-core/src/fmt.rs | 2 +- py-polars/polars/config.py | 161 ++++++++++++++++++------------- py-polars/polars/type_aliases.py | 1 + py-polars/src/lib.rs | 11 +++ py-polars/src/series.rs | 2 +- py-polars/tests/unit/test_cfg.py | 4 + 6 files changed, 112 insertions(+), 69 deletions(-) diff --git a/polars/polars-core/src/fmt.rs b/polars/polars-core/src/fmt.rs index 948bb10ecc59..fe7fde8ea0f0 100644 --- a/polars/polars-core/src/fmt.rs +++ b/polars/polars-core/src/fmt.rs @@ -35,7 +35,7 @@ pub enum FloatFmt { } static FLOAT_FMT: AtomicU8 = AtomicU8::new(FloatFmt::Mixed as u8); -fn get_float_fmt() -> FloatFmt { +pub fn get_float_fmt() -> FloatFmt { match FLOAT_FMT.load(Ordering::Relaxed) { 0 => FloatFmt::Mixed, 1 => FloatFmt::Full, diff --git a/py-polars/polars/config.py b/py-polars/polars/config.py index 7420ba183116..76471f9f983c 100644 --- a/py-polars/polars/config.py +++ b/py-polars/polars/config.py @@ -6,23 +6,34 @@ from polars.dependencies import json -with contextlib.suppress(ImportError): # Module not available when building docs - from polars.polars import set_float_fmt as _set_float_fmt +# dummy func required (so docs build) +def _get_float_fmt() -> str: + return "n/a" + + +# note: module not available when building docs +with contextlib.suppress(ImportError): + from polars.polars import get_float_fmt as _get_float_fmt # type: ignore[no-redef] + from polars.polars import set_float_fmt as _set_float_fmt if TYPE_CHECKING: import sys from types import TracebackType + from polars.type_aliases import FloatFmt + if sys.version_info >= (3, 8): from typing import Literal else: from typing_extensions import Literal + # note: register all Config-specific environment variable names here; need to constrain # which 'POLARS_' environment variables are recognised, as there are other lower-level # and/or experimental settings that should not be saved or reset with the Config vars. -POLARS_CFG_ENV_VARS = { +_POLARS_CFG_ENV_VARS = { + "POLARS_ACTIVATE_DECIMAL", "POLARS_AUTO_STRUCTIFY", "POLARS_FMT_MAX_COLS", "POLARS_FMT_MAX_ROWS", @@ -36,12 +47,15 @@ "POLARS_FMT_TABLE_HIDE_DATAFRAME_SHAPE_INFORMATION", "POLARS_FMT_TABLE_INLINE_COLUMN_DATA_TYPE", "POLARS_FMT_TABLE_ROUNDED_CORNERS", + "POLARS_STREAMING_CHUNK_SIZE", "POLARS_TABLE_WIDTH", "POLARS_VERBOSE", - "POLARS_ACTIVATE_DECIMAL", - "POLARS_STREAMING_CHUNK_SIZE", } +# vars that set the rust env directly should declare themselves here as the Config +# method name paired with a callable that returns the current state of that value: +_POLARS_CFG_DIRECT_VARS = {"set_fmt_float": _get_float_fmt} + class Config: """ @@ -87,9 +101,9 @@ def load(cls, cfg: str) -> type[Config]: """ options = json.loads(cfg) os.environ.update(options.get("environment", {})) - for flag, value in options.get("local", {}).items(): - if hasattr(cls, flag): - setattr(cls, flag, value) + for cfg_methodname, value in options.get("direct", {}).items(): + if hasattr(cls, cfg_methodname): + getattr(cls, cfg_methodname)(value) return cls @classmethod @@ -107,7 +121,7 @@ def restore_defaults(cls) -> type[Config]: >>> cfg = pl.Config.restore_defaults() # doctest: +SKIP """ - for var in POLARS_CFG_ENV_VARS: + for var in _POLARS_CFG_ENV_VARS: os.environ.pop(var, None) cls.set_fmt_float() return cls @@ -124,27 +138,35 @@ def save(cls) -> str: """ environment_vars = { key: os.environ[key] - for key in sorted(POLARS_CFG_ENV_VARS) + for key in sorted(_POLARS_CFG_ENV_VARS) if (key in os.environ) } - # note: 'local' vars are unused; preserved here for - # backwards-compatibility with previously-saved configs + direct_vars = { + cfg_methodname: get_value() + for cfg_methodname, get_value in _POLARS_CFG_DIRECT_VARS.items() + } return json.dumps( - {"environment": environment_vars, "local": {}}, + {"environment": environment_vars, "direct": direct_vars}, separators=(",", ":"), ) @classmethod - def state(cls, if_set: bool = False) -> dict[str, str | None]: + def state( + cls, if_set: bool = False, env_only: bool = False + ) -> dict[str, str | None]: """ - Show the current state of all Config environment variables as a dict. + Show the current state of all Config variables as a dict. Parameters ---------- if_set : bool by default this will show the state of all ``Config`` environment variables. change this to ``True`` to restrict the returned dictionary to include only - those that _have_ been set to a specific value. + those that have been set to a specific value. + + env_only : bool + include only Config environment variables in the output; some options (such + as "set_fmt_float") are set directly, not via an environment variable. Examples -------- @@ -152,11 +174,31 @@ def state(cls, if_set: bool = False) -> dict[str, str | None]: >>> all_state = pl.Config.state() """ - return { + config_state = { var: os.environ.get(var) - for var in sorted(POLARS_CFG_ENV_VARS) + for var in sorted(_POLARS_CFG_ENV_VARS) if not if_set or (os.environ.get(var) is not None) } + if not env_only: + for cfg_methodname, get_value in _POLARS_CFG_DIRECT_VARS.items(): + config_state[cfg_methodname] = get_value() + + return config_state + + @classmethod + def activate_decimals(cls) -> type[Config]: + """ + Activate ``Decimal`` data types. + + This is temporary setting that will be removed later once + ``Decimal`` type stabilize. This happens without it being + considered a breaking change. + + Currently, ``Decimal`` types are in alpha stage. + + """ + os.environ["POLARS_ACTIVATE_DECIMAL"] = "1" + return cls @classmethod def set_ascii_tables(cls, active: bool = True) -> type[Config]: @@ -190,6 +232,20 @@ def set_auto_structify(cls, active: bool = False) -> type[Config]: os.environ["POLARS_AUTO_STRUCTIFY"] = str(int(active)) return cls + @classmethod + def set_fmt_float(cls, fmt: FloatFmt = "mixed") -> type[Config]: + """ + Control how floating point values are displayed. + + Parameters + ---------- + fmt : {"mixed", "full"} + How to format floating point numbers + + """ + _set_float_fmt(fmt) + return cls + @classmethod def set_fmt_str_lengths(cls, n: int) -> type[Config]: """ @@ -204,6 +260,26 @@ def set_fmt_str_lengths(cls, n: int) -> type[Config]: os.environ["POLARS_FMT_STR_LEN"] = str(n) return cls + @classmethod + def set_streaming_chunk_size(cls, size: int) -> type[Config]: + """ + Overwrite chunk size used in ``streaming`` engine. + + By default, the chunk size is determined by the schema + and size of the thread pool. For some datasets (esp. + when you have large string elements) this can be too + optimistic and lead to Out of Memory errors. + + Parameters + ---------- + size + Number of rows per chunk. Every thread will process chunks + of this size. + + """ + os.environ["POLARS_STREAMING_CHUNK_SIZE"] = str(size) + return cls + @classmethod def set_tbl_cell_alignment( cls, format: Literal["LEFT", "CENTER", "RIGHT"] @@ -559,52 +635,3 @@ def set_verbose(cls, active: bool = True) -> type[Config]: """Enable additional verbose/debug logging.""" os.environ["POLARS_VERBOSE"] = str(int(active)) return cls - - @classmethod - def set_fmt_float(cls, fmt: str = "mixed") -> type[Config]: - """ - Control how floating point values are displayed. - - Parameters - ---------- - fmt : {"mixed", "full"} - How to format floating point numbers - - """ - _set_float_fmt(fmt) - return cls - - @classmethod - def activate_decimals(cls) -> type[Config]: - """ - Activate ``Decimal`` data types. - - This is temporary setting that will be removed later once - ``Decimal`` type stabilize. This happens without it being - considered a breaking change. - - Currently, ``Decimal`` types are in alpha stage. - - """ - os.environ["POLARS_ACTIVATE_DECIMAL"] = "1" - return cls - - @classmethod - def set_streaming_chunk_size(cls, size: int) -> type[Config]: - """ - Overwrite chunk size used in ``streaming`` engine. - - By default, the chunk size is determined by the schema - and size of the thread pool. For some datasets (esp. - when you have large string elements) this can be too - optimistic and lead to Out of Memory errors. - - Parameters - ---------- - size - Number of rows per chunk. Every thread will process chunks - of this size. - - """ - os.environ["POLARS_STREAMING_CHUNK_SIZE"] = str(size) - return cls diff --git a/py-polars/polars/type_aliases.py b/py-polars/polars/type_aliases.py index aaa9f5565521..77d69dbb0d28 100644 --- a/py-polars/polars/type_aliases.py +++ b/py-polars/polars/type_aliases.py @@ -81,6 +81,7 @@ FillNullStrategy: TypeAlias = Literal[ "forward", "backward", "min", "max", "mean", "zero", "one" ] +FloatFmt: TypeAlias = Literal["full", "mixed"] IpcCompression: TypeAlias = Literal["uncompressed", "lz4", "zstd"] NullBehavior: TypeAlias = Literal["ignore", "drop"] NullStrategy: TypeAlias = Literal["ignore", "propagate"] diff --git a/py-polars/src/lib.rs b/py-polars/src/lib.rs index b4a26ef1f393..b4d6d5f02411 100644 --- a/py-polars/src/lib.rs +++ b/py-polars/src/lib.rs @@ -594,6 +594,16 @@ fn set_float_fmt(fmt: &str) -> PyResult<()> { Ok(()) } +#[pyfunction] +fn get_float_fmt() -> PyResult { + use polars_core::fmt::{get_float_fmt, FloatFmt}; + let strfmt = match get_float_fmt() { + FloatFmt::Full => "full", + FloatFmt::Mixed => "mixed", + }; + Ok(strfmt.to_string()) +} + #[pymodule] fn polars(py: Python, m: &PyModule) -> PyResult<()> { m.add("ArrowError", py.get_type::()) @@ -692,5 +702,6 @@ fn polars(py: Python, m: &PyModule) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(get_index_type)).unwrap(); m.add_wrapped(wrap_pyfunction!(coalesce_exprs)).unwrap(); m.add_wrapped(wrap_pyfunction!(set_float_fmt)).unwrap(); + m.add_wrapped(wrap_pyfunction!(get_float_fmt)).unwrap(); Ok(()) } diff --git a/py-polars/src/series.rs b/py-polars/src/series.rs index a556de104538..f7a26a33f8ad 100644 --- a/py-polars/src/series.rs +++ b/py-polars/src/series.rs @@ -397,7 +397,7 @@ impl PySeries { if val == v_trunc { val } else { - format!("{v_trunc}...") + format!("{v_trunc}…") } } else { val diff --git a/py-polars/tests/unit/test_cfg.py b/py-polars/tests/unit/test_cfg.py index 0b72867d05a1..bdf8c3aab1b0 100644 --- a/py-polars/tests/unit/test_cfg.py +++ b/py-polars/tests/unit/test_cfg.py @@ -6,6 +6,7 @@ import pytest import polars as pl +from polars.config import _get_float_fmt from polars.testing import assert_frame_equal @@ -416,6 +417,7 @@ def test_config_load_save() -> None: # set some config options... pl.Config.set_tbl_cols(12) pl.Config.set_verbose(True) + pl.Config.set_fmt_float("full") assert os.environ.get("POLARS_VERBOSE") == "1" cfg = pl.Config.save() @@ -433,6 +435,7 @@ def test_config_load_save() -> None: # ...and confirm the saved options were set. assert os.environ.get("POLARS_FMT_MAX_COLS") == "12" assert os.environ.get("POLARS_VERBOSE") == "1" + assert _get_float_fmt() == "full" # restore all default options (unsets from env) pl.Config.restore_defaults() @@ -442,6 +445,7 @@ def test_config_load_save() -> None: assert os.environ.get("POLARS_FMT_MAX_COLS") is None assert os.environ.get("POLARS_VERBOSE") is None + assert _get_float_fmt() == "mixed" def test_config_scope() -> None: