Skip to content

Commit

Permalink
fix(python, rust): properly interpret FMT_MAX_ROWS - remove arbitrary…
Browse files Browse the repository at this point in the history
… minimum, fix Series formatting (pola-rs#5281)
  • Loading branch information
alexander-beedie authored and zundertj committed Jan 7, 2023
1 parent 5d02f50 commit ca6b2a2
Show file tree
Hide file tree
Showing 7 changed files with 277 additions and 345 deletions.
13 changes: 12 additions & 1 deletion polars/polars-core/src/config.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
pub(crate) const FMT_STR_LEN: &str = "POLARS_FMT_STR_LEN";
// Formatting environment variables (typically referenced/set from the python-side Config object)
pub(crate) const FMT_MAX_COLS: &str = "POLARS_FMT_MAX_COLS";
pub(crate) const FMT_MAX_ROWS: &str = "POLARS_FMT_MAX_ROWS";
pub(crate) const FMT_STR_LEN: &str = "POLARS_FMT_STR_LEN";
pub(crate) const FMT_TABLE_CELL_ALIGNMENT: &str = "POLARS_FMT_TABLE_CELL_ALIGNMENT";
pub(crate) const FMT_TABLE_DATAFRAME_SHAPE_BELOW: &str = "POLARS_FMT_TABLE_DATAFRAME_SHAPE_BELOW";
pub(crate) const FMT_TABLE_FORMATTING: &str = "POLARS_FMT_TABLE_FORMATTING";
pub(crate) const FMT_TABLE_HIDE_COLUMN_DATA_TYPES: &str = "POLARS_FMT_TABLE_HIDE_COLUMN_DATA_TYPES";
pub(crate) const FMT_TABLE_HIDE_COLUMN_NAMES: &str = "POLARS_FMT_TABLE_HIDE_COLUMN_NAMES";
pub(crate) const FMT_TABLE_HIDE_COLUMN_SEPARATOR: &str = "POLARS_FMT_TABLE_HIDE_COLUMN_SEPARATOR";
pub(crate) const FMT_TABLE_HIDE_DATAFRAME_SHAPE_INFORMATION: &str =
"POLARS_FMT_TABLE_HIDE_DATAFRAME_SHAPE_INFORMATION";
pub(crate) const FMT_TABLE_INLINE_COLUMN_DATA_TYPE: &str =
"POLARS_FMT_TABLE_INLINE_COLUMN_DATA_TYPE";
123 changes: 61 additions & 62 deletions polars/polars-core/src/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use comfy_table::presets::*;
use comfy_table::*;
use num::{Num, NumCast};

use crate::config::{FMT_MAX_COLS, FMT_MAX_ROWS, FMT_STR_LEN};
use crate::config::*;
use crate::prelude::*;

const LIMIT: usize = 25;
Expand Down Expand Up @@ -47,18 +47,9 @@ macro_rules! format_array {
.as_deref()
.unwrap_or("")
.parse()
.map_or(LIMIT, |n: i64| {
if n < 0 {
$a.len()
} else if n < 2 {
2
} else {
n as usize
}
});
.map_or(LIMIT, |n: i64| if n < 0 { $a.len() } else { n as usize });
std::cmp::min(limit, $a.len())
};

let write_fn = |v, f: &mut Formatter| {
if truncate {
let v = format!("{}", v);
Expand All @@ -78,16 +69,19 @@ macro_rules! format_array {
};
Ok(())
};

if limit < $a.len() {
for i in 0..limit / 2 {
let v = $a.get_any_value(i);
write_fn(v, $f)?;
if limit > 0 {
for i in 0..std::cmp::max((limit / 2), 1) {
let v = $a.get_any_value(i);
write_fn(v, $f)?;
}
}
write!($f, "\t...\n")?;
for i in (0..limit / 2).rev() {
let v = $a.get_any_value($a.len() - i - 1);
write_fn(v, $f)?;
if limit > 1 {
for i in ($a.len() - (limit + 1) / 2)..$a.len() {
let v = $a.get_any_value(i);
write_fn(v, $f)?;
}
}
} else {
for i in 0..limit {
Expand Down Expand Up @@ -350,7 +344,6 @@ impl Display for DataFrame {
self.columns.iter().all(|s| s.len() == height),
"The column lengths in the DataFrame are not equal."
);

let str_truncate = std::env::var(FMT_STR_LEN)
.as_deref()
.unwrap_or("")
Expand All @@ -363,14 +356,12 @@ impl Display for DataFrame {
.parse()
.map_or(8, |n: i64| if n < 0 { self.width() } else { n as usize });

let max_n_rows = {
let max_n_rows = std::env::var(FMT_MAX_ROWS)
.as_deref()
.unwrap_or("")
.parse()
.map_or(8, |n: i64| if n < 0 { height } else { n as usize });
std::cmp::max(max_n_rows, 2)
};
let max_n_rows = std::env::var(FMT_MAX_ROWS)
.as_deref()
.unwrap_or("")
.parse()
.map_or(8, |n: i64| if n < 0 { height } else { n as usize });

let (n_first, n_last) = if self.width() > max_n_cols {
((max_n_cols + 1) / 2, max_n_cols / 2)
} else {
Expand All @@ -383,27 +374,27 @@ impl Display for DataFrame {
let name = make_str_val(f.name(), str_truncate);
let lower_bounds = std::cmp::max(5, std::cmp::min(12, name.len()));
let mut column_name = name;
if env_is_true("POLARS_FMT_TABLE_HIDE_COLUMN_NAMES") {
if env_is_true(FMT_TABLE_HIDE_COLUMN_NAMES) {
column_name = "".to_string();
}
let column_data_type = if env_is_true("POLARS_FMT_TABLE_HIDE_COLUMN_DATA_TYPES") {
let column_data_type = if env_is_true(FMT_TABLE_HIDE_COLUMN_DATA_TYPES) {
"".to_string()
} else if env_is_true("POLARS_FMT_TABLE_INLINE_COLUMN_DATA_TYPE")
| env_is_true("POLARS_FMT_TABLE_HIDE_COLUMN_NAMES")
} else if env_is_true(FMT_TABLE_INLINE_COLUMN_DATA_TYPE)
| env_is_true(FMT_TABLE_HIDE_COLUMN_NAMES)
{
format!("{}", f.data_type())
} else {
format!("\n{}", f.data_type())
};
let mut column_separator = "\n---";
if env_is_true("POLARS_FMT_TABLE_HIDE_COLUMN_SEPARATOR")
| env_is_true("POLARS_FMT_TABLE_HIDE_COLUMN_NAMES")
| env_is_true("POLARS_FMT_TABLE_HIDE_COLUMN_DATA_TYPES")
if env_is_true(FMT_TABLE_HIDE_COLUMN_SEPARATOR)
| env_is_true(FMT_TABLE_HIDE_COLUMN_NAMES)
| env_is_true(FMT_TABLE_HIDE_COLUMN_DATA_TYPES)
{
column_separator = ""
}
let s = if env_is_true("POLARS_FMT_TABLE_INLINE_COLUMN_DATA_TYPE")
& !env_is_true("POLARS_FMT_TABLE_HIDE_COLUMN_DATA_TYPES")
let s = if env_is_true(FMT_TABLE_INLINE_COLUMN_DATA_TYPE)
& !env_is_true(FMT_TABLE_HIDE_COLUMN_DATA_TYPES)
{
format!("{} ({})", column_name, column_data_type)
} else {
Expand All @@ -430,7 +421,7 @@ impl Display for DataFrame {
names.push(s);
constraints.push(tbl_lower_bounds(l));
}
let preset = match std::env::var("POLARS_FMT_TABLE_FORMATTING")
let preset = match std::env::var(FMT_TABLE_FORMATTING)
.as_deref()
.unwrap_or("DEFAULT")
{
Expand All @@ -441,6 +432,7 @@ impl Display for DataFrame {
"ASCII_HORIZONTAL_ONLY" => ASCII_HORIZONTAL_ONLY,
"ASCII_MARKDOWN" => ASCII_MARKDOWN,
"UTF8_FULL" => UTF8_FULL,
"UTF8_FULL_CONDENSED" => UTF8_FULL_CONDENSED,
"UTF8_NO_BORDERS" => UTF8_NO_BORDERS,
"UTF8_BORDERS_ONLY" => UTF8_BORDERS_ONLY,
"UTF8_HORIZONTAL_ONLY" => UTF8_HORIZONTAL_ONLY,
Expand All @@ -454,33 +446,40 @@ impl Display for DataFrame {
.load_preset(preset)
.set_content_arrangement(ContentArrangement::Dynamic);

let mut rows = Vec::with_capacity(max_n_rows);
if self.height() > max_n_rows {
for i in 0..(max_n_rows / 2) {
let row = self.columns.iter().map(|s| s.str_value(i)).collect();
rows.push(prepare_row(row, n_first, n_last, str_truncate));
}
let dots = rows[0].iter().map(|_| "...".to_string()).collect();
rows.push(dots);
for i in (self.height() - (max_n_rows + 1) / 2)..self.height() {
let row = self.columns.iter().map(|s| s.str_value(i)).collect();
rows.push(prepare_row(row, n_first, n_last, str_truncate));
}
table.add_rows(rows);
} else {
for i in 0..self.height() {
if self.width() > 0 {
if max_n_rows > 0 {
if height > max_n_rows {
let mut rows = Vec::with_capacity(std::cmp::max(max_n_rows, 2));
for i in 0..std::cmp::max(max_n_rows / 2, 1) {
let row = self.columns.iter().map(|s| s.str_value(i)).collect();
table.add_row(prepare_row(row, n_first, n_last, str_truncate));
} else {
break;
rows.push(prepare_row(row, n_first, n_last, str_truncate));
}
let dots = rows[0].iter().map(|_| "...".to_string()).collect();
rows.push(dots);
if max_n_rows > 1 {
for i in (height - (max_n_rows + 1) / 2)..height {
let row = self.columns.iter().map(|s| s.str_value(i)).collect();
rows.push(prepare_row(row, n_first, n_last, str_truncate));
}
}
table.add_rows(rows);
} else {
for i in 0..height {
if self.width() > 0 {
let row = self.columns.iter().map(|s| s.str_value(i)).collect();
table.add_row(prepare_row(row, n_first, n_last, str_truncate));
} else {
break;
}
}
}
} else if height > 0 {
let dots: Vec<String> = self.columns.iter().map(|_| "...".to_string()).collect();
table.add_row(dots);
}

// insert a header row, unless both column names and column data types are already hidden
if !(env_is_true("POLARS_FMT_TABLE_HIDE_COLUMN_NAMES")
&& env_is_true("POLARS_FMT_TABLE_HIDE_COLUMN_DATA_TYPES"))
if !(env_is_true(FMT_TABLE_HIDE_COLUMN_NAMES)
&& env_is_true(FMT_TABLE_HIDE_COLUMN_DATA_TYPES))
{
table.set_header(names).set_constraints(constraints);
}
Expand All @@ -505,9 +504,9 @@ impl Display for DataFrame {
}

// set alignment of cells, if defined
if std::env::var("POLARS_FMT_TABLE_CELL_ALIGNMENT").is_ok() {
if std::env::var(FMT_TABLE_CELL_ALIGNMENT).is_ok() {
// for (column_index, column) in table.column_iter_mut().enumerate() {
let str_preset = std::env::var("POLARS_FMT_TABLE_CELL_ALIGNMENT")
let str_preset = std::env::var(FMT_TABLE_CELL_ALIGNMENT)
.unwrap_or_else(|_| "DEFAULT".to_string());
for column in table.column_iter_mut() {
if str_preset == "RIGHT" {
Expand All @@ -523,9 +522,9 @@ impl Display for DataFrame {
}

// establish 'shape' information (above/below/hidden)
if env_is_true("POLARS_FMT_TABLE_HIDE_DATAFRAME_SHAPE_INFORMATION") {
if env_is_true(FMT_TABLE_HIDE_DATAFRAME_SHAPE_INFORMATION) {
write!(f, "{}", table)?;
} else if env_is_true("POLARS_FMT_TABLE_DATAFRAME_SHAPE_BELOW") {
} else if env_is_true(FMT_TABLE_DATAFRAME_SHAPE_BELOW) {
write!(f, "{}\nshape: {:?}", table, self.shape())?;
} else {
write!(f, "shape: {:?}\n{}", self.shape(), table)?;
Expand Down
32 changes: 20 additions & 12 deletions py-polars/polars/cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def set_tbl_column_data_type_inline(cls, active: bool = True) -> type[Config]:
Examples
--------
>>> df = pl.DataFrame({"abc": [1.0, 2.5, 5.0], "xyz": [True, False, True]})
>>> pl.Config.set_tbl_dataframe_shape_below(True) # doctest: +SKIP
>>> pl.Config.set_tbl_column_data_type_inline(True) # doctest: +SKIP
# ...
# shape: (3, 2) shape: (3, 2)
# ┌─────┬───────┐ ┌───────────┬────────────┐
Expand Down Expand Up @@ -344,6 +344,7 @@ def set_tbl_formatting(
"ASCII_HORIZONTAL_ONLY",
"ASCII_MARKDOWN",
"UTF8_FULL",
"UTF8_FULL_CONDENSED",
"UTF8_NO_BORDERS",
"UTF8_BORDERS_ONLY",
"UTF8_HORIZONTAL_ONLY",
Expand All @@ -356,17 +357,24 @@ def set_tbl_formatting(
Parameters
----------
format : str
* "ASCII_FULL": ASCII borders / lines
* "ASCII_NO_BORDERS": ASCII no borders
* "ASCII_BORDERS_ONLY": ASCII borders only
* "ASCII_BORDERS_ONLY_CONDENSED": ASCII borders only condensed
* "ASCII_HORIZONTAL_ONLY": Horizontal lines only
* "ASCII_MARKDOWN": Markdown style
* "UTF8_FULL": UTF8 borders lines
* "UTF8_NO_BORDERS": UTF8 no borders
* "UTF8_BORDERS_ONLY": UTF8 borders only
* "UTF8_HORIZONTAL_ONLY": UTF8 horizontal only
* "NOTHING": No borders /lines
* "ASCII_FULL": ASCII, borders / lines.
* "ASCII_NO_BORDERS": ASCII, no borders.
* "ASCII_BORDERS_ONLY": ASCII, borders only.
* "ASCII_BORDERS_ONLY_CONDENSED": ASCII, borders only, dense row spacing.
* "ASCII_HORIZONTAL_ONLY": ASCII, horizontal lines only.
* "ASCII_MARKDOWN": ASCII, Markdown compatible.
* "UTF8_FULL": UTF8, with all borders and lines (default).
* "UTF8_FULL_CONDENSED": Same as UTF8_FULL, with dense row spacing.
* "UTF8_NO_BORDERS": UTF8, no borders.
* "UTF8_BORDERS_ONLY": UTF8, borders only.
* "UTF8_HORIZONTAL_ONLY": UTF8, horizontal lines only.
* "NOTHING": No borders or other lines.
Notes
-----
The UTF8 styles all use one or more of the semigraphic box-drawing characters
found in the Unicode Box Drawing block, which are not ASCII compatible:
https://en.wikipedia.org/wiki/Box-drawing_character#Box_Drawing
Raises
------
Expand Down
Loading

0 comments on commit ca6b2a2

Please sign in to comment.