diff --git a/polars/polars-core/src/fmt.rs b/polars/polars-core/src/fmt.rs index fe7fde8ea0f0..63693b4d6d99 100644 --- a/polars/polars-core/src/fmt.rs +++ b/polars/polars-core/src/fmt.rs @@ -1,8 +1,8 @@ #[cfg(any(feature = "fmt", feature = "fmt_no_tty"))] use std::borrow::Cow; -use std::fmt; use std::fmt::{Debug, Display, Formatter}; use std::sync::atomic::{AtomicU8, Ordering}; +use std::{fmt, str}; #[cfg(any( feature = "dtype-date", @@ -52,7 +52,7 @@ macro_rules! format_array { write!( $f, "shape: ({},)\n{}: '{}' [{}]\n[\n", - $a.len(), + fmt_uint(&$a.len()), $array_type, $name, $dtype @@ -133,7 +133,7 @@ fn format_object_array( write!( f, "shape: ({},)\n{}: '{}' [o][{}]\n[\n", - object.len(), + fmt_uint(&object.len()), array_type, name, inner_type @@ -364,6 +364,24 @@ fn env_is_true(varname: &str) -> bool { std::env::var(varname).as_deref().unwrap_or("0") == "1" } +fn fmt_uint(num: &usize) -> String { + // Return a string with thousands separated by _ + // e.g. 1_000_000 + num.to_string() + .as_bytes() + .rchunks(3) + .rev() + .map(str::from_utf8) + .collect::, _>>() + .unwrap() + .join("_") // separator +} + +fn fmt_df_shape((shape0, shape1): &(usize, usize)) -> String { + // e.g. (1_000_000, 4_000) + format!("({}, {})", fmt_uint(shape0), fmt_uint(shape1)) +} + impl Display for DataFrame { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { #[cfg(any(feature = "fmt", feature = "fmt_no_tty"))] @@ -572,12 +590,14 @@ impl Display for DataFrame { } // establish 'shape' information (above/below/hidden) + let shape_str = fmt_df_shape(&self.shape()); + if env_is_true(FMT_TABLE_HIDE_DATAFRAME_SHAPE_INFORMATION) { write!(f, "{table}")?; } else if env_is_true(FMT_TABLE_DATAFRAME_SHAPE_BELOW) { - write!(f, "{table}\nshape: {:?}", self.shape())?; + write!(f, "{table}\nshape: {}", shape_str)?; } else { - write!(f, "shape: {:?}\n{}", self.shape(), table)?; + write!(f, "shape: {}\n{}", shape_str, table)?; } } diff --git a/polars/polars-sql/README.md b/polars/polars-sql/README.md index 5b45ee8e8da7..a2dca07b1e50 100644 --- a/polars/polars-sql/README.md +++ b/polars/polars-sql/README.md @@ -20,7 +20,7 @@ quit \q Exit >> register taxis /home/ritchie46/example/csv-benchmark/yellow_tripdata_2010-01.parquet Added dataframe "taxis" from file /home/ritchie46/example/csv-benchmark/yellow_tripdata_2010-01.parquet -shape: (14863778, 18) +shape: (14_863_778, 18) ┌───────────┬─────────────────────┬─────────────────────┬─────────────────┬─────┬─────────┬────────────┬──────────────┬──────────────┐ │ vendor_id ┆ pickup_datetime ┆ dropoff_datetime ┆ passenger_count ┆ ... ┆ mta_tax ┆ tip_amount ┆ tolls_amount ┆ total_amount │ │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │ diff --git a/py-polars/polars/expr/datetime.py b/py-polars/polars/expr/datetime.py index b7033b583d96..fcdf2a6dfe4a 100644 --- a/py-polars/polars/expr/datetime.py +++ b/py-polars/polars/expr/datetime.py @@ -1022,7 +1022,7 @@ def microsecond(self) -> Expr: ... pl.col("date").dt.microsecond().alias("microseconds"), ... ] ... ) - shape: (1001, 2) + shape: (1_001, 2) ┌─────────────────────────┬──────────────┐ │ date ┆ microseconds │ │ --- ┆ --- │ @@ -1492,7 +1492,7 @@ def milliseconds(self) -> Expr: ... pl.col("date").diff().dt.milliseconds().alias("milliseconds_diff"), ... ] ... ) - shape: (1001, 2) + shape: (1_001, 2) ┌─────────────────────────┬───────────────────┐ │ date ┆ milliseconds_diff │ │ --- ┆ --- │ @@ -1536,7 +1536,7 @@ def microseconds(self) -> Expr: ... pl.col("date").diff().dt.microseconds().alias("microseconds_diff"), ... ] ... ) - shape: (1001, 2) + shape: (1_001, 2) ┌─────────────────────────┬───────────────────┐ │ date ┆ microseconds_diff │ │ --- ┆ --- │ @@ -1580,7 +1580,7 @@ def nanoseconds(self) -> Expr: ... pl.col("date").diff().dt.nanoseconds().alias("nanoseconds_diff"), ... ] ... ) - shape: (1001, 2) + shape: (1_001, 2) ┌─────────────────────────┬──────────────────┐ │ date ┆ nanoseconds_diff │ │ --- ┆ --- │ diff --git a/py-polars/tests/unit/test_cfg.py b/py-polars/tests/unit/test_cfg.py index bdf8c3aab1b0..26ded55a4761 100644 --- a/py-polars/tests/unit/test_cfg.py +++ b/py-polars/tests/unit/test_cfg.py @@ -386,6 +386,81 @@ def test_shape_below_table_and_inlined_dtype() -> None: ) +def test_shape_format_for_big_numbers() -> None: + df = pl.DataFrame({"a": range(1, 1001), "b": range(1001, 1001 + 1000)}) + + pl.Config.set_tbl_column_data_type_inline(True).set_tbl_dataframe_shape_below(True) + pl.Config.set_tbl_formatting("UTF8_FULL", rounded_corners=True) + assert ( + str(df) == "" + "╭─────────┬─────────╮\n" + "│ a (i64) ┆ b (i64) │\n" + "╞═════════╪═════════╡\n" + "│ 1 ┆ 1001 │\n" + "├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤\n" + "│ 2 ┆ 1002 │\n" + "├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤\n" + "│ 3 ┆ 1003 │\n" + "├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤\n" + "│ 4 ┆ 1004 │\n" + "├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤\n" + "│ … ┆ … │\n" + "├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤\n" + "│ 997 ┆ 1997 │\n" + "├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤\n" + "│ 998 ┆ 1998 │\n" + "├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤\n" + "│ 999 ┆ 1999 │\n" + "├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤\n" + "│ 1000 ┆ 2000 │\n" + "╰─────────┴─────────╯\n" + "shape: (1_000, 2)" + ) + + pl.Config.set_tbl_column_data_type_inline(True).set_tbl_dataframe_shape_below(False) + assert ( + str(df) == "shape: (1_000, 2)\n" + "╭─────────┬─────────╮\n" + "│ a (i64) ┆ b (i64) │\n" + "╞═════════╪═════════╡\n" + "│ 1 ┆ 1001 │\n" + "├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤\n" + "│ 2 ┆ 1002 │\n" + "├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤\n" + "│ 3 ┆ 1003 │\n" + "├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤\n" + "│ 4 ┆ 1004 │\n" + "├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤\n" + "│ … ┆ … │\n" + "├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤\n" + "│ 997 ┆ 1997 │\n" + "├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤\n" + "│ 998 ┆ 1998 │\n" + "├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤\n" + "│ 999 ┆ 1999 │\n" + "├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤\n" + "│ 1000 ┆ 2000 │\n" + "╰─────────┴─────────╯" + ) + + pl.Config.set_tbl_rows(0) + ser = pl.Series("ser", range(1000)) + assert str(ser) == "shape: (1_000,)\n" "Series: 'ser' [i64]\n" "[\n" "\t…\n" "]" + + pl.Config.set_tbl_rows(1) + pl.Config.set_tbl_cols(1) + df = pl.DataFrame({str(col_num): 1 for col_num in range(1000)}) + + assert ( + str(df) == "shape: (1, 1_000)\n" + "╭─────────┬───╮\n" + "│ 0 (i64) ┆ … │\n" + "╞═════════╪═══╡\n" + "│ 1 ┆ … │\n" + "╰─────────┴───╯" + ) + + def test_string_cache() -> None: df1 = pl.DataFrame({"a": ["foo", "bar", "ham"], "b": [1, 2, 3]}) df2 = pl.DataFrame({"a": ["foo", "spam", "eggs"], "c": [3, 2, 2]})