From 948b4309c621d44254c7e6b8c22b141e78e247da Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Mon, 27 May 2024 16:45:55 +0200 Subject: [PATCH] better arrow formatting --- Cargo.lock | 77 +++++---- Cargo.toml | 4 +- crates/re_data_store/src/store_format.rs | 15 +- crates/re_format_arrow/Cargo.toml | 9 +- crates/re_format_arrow/src/lib.rs | 191 +++++++++++++++-------- crates/re_log_types/src/arrow_msg.rs | 2 + crates/re_log_types/src/data_cell.rs | 14 +- crates/re_log_types/src/data_row.rs | 12 +- crates/re_log_types/src/data_table.rs | 8 +- 9 files changed, 214 insertions(+), 118 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f009ea39d439..e11b9955cf1c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +# This file is automatically @generated by Cargo.carglo version = 3 [[package]] @@ -1171,10 +1172,11 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.1.0" +version = "7.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c64043d6c7b7a4c58e39e7efccfdea7b93d885a795d0c054a69dbbf4dd52686" +checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" dependencies = [ + "crossterm", "strum", "strum_macros", "unicode-width", @@ -1382,6 +1384,28 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "crossterm" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" +dependencies = [ + "bitflags 2.4.1", + "crossterm_winapi", + "libc", + "parking_lot", + "winapi", +] + +[[package]] +name = "crossterm_winapi" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" +dependencies = [ + "winapi", +] + [[package]] name = "crunchy" version = "0.2.2" @@ -2625,9 +2649,9 @@ dependencies = [ [[package]] name = "indoc" -version = "2.0.4" +version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8" +checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" [[package]] name = "infer" @@ -3127,9 +3151,9 @@ dependencies = [ [[package]] name = "multimap" -version = "0.8.3" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" +checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" [[package]] name = "naga" @@ -3822,9 +3846,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.12.3" +version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c289cda302b98a28d40c8b3b90498d6e526dd24ac2ecea73e4e491685b94a" +checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" dependencies = [ "bytes", "prost-derive", @@ -3832,13 +3856,13 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.12.3" +version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c55e02e35260070b6f716a2423c2ff1c3bb1642ddca6f99e1f26d06268a0e2d2" +checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" dependencies = [ "bytes", "heck 0.4.1", - "itertools 0.10.5", + "itertools 0.12.0", "log", "multimap", "once_cell", @@ -3849,17 +3873,16 @@ dependencies = [ "regex", "syn 2.0.48", "tempfile", - "which", ] [[package]] name = "prost-derive" -version = "0.12.3" +version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e" +checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" dependencies = [ "anyhow", - "itertools 0.10.5", + "itertools 0.12.0", "proc-macro2", "quote", "syn 2.0.48", @@ -3867,9 +3890,9 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.12.3" +version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "193898f59edcf43c26227dcd4c8427f00d99d61e95dcde58dabd49fa291d470e" +checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" dependencies = [ "prost", ] @@ -6168,18 +6191,18 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "strum" -version = "0.25.0" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" dependencies = [ "strum_macros", ] [[package]] name = "strum_macros" -version = "0.25.3" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" +checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946" dependencies = [ "heck 0.4.1", "proc-macro2", @@ -7196,18 +7219,6 @@ dependencies = [ "web-sys", ] -[[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix 0.38.24", -] - [[package]] name = "widestring" version = "1.0.2" diff --git a/Cargo.toml b/Cargo.toml index 400ec03eac0e..a494bfbcdf25 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -207,8 +207,8 @@ similar-asserts = "1.4.2" slotmap = { version = "1.0.6", features = ["serde"] } smallvec = { version = "1.0", features = ["const_generics", "union"] } static_assertions = "1.1" -strum = { version = "0.25", features = ["derive"] } -strum_macros = "0.25" +strum = { version = "0.26", features = ["derive"] } +strum_macros = "0.26" sublime_fuzzy = "0.7" syn = "2.0" sysinfo = { version = "0.30.1", default-features = false } diff --git a/crates/re_data_store/src/store_format.rs b/crates/re_data_store/src/store_format.rs index 38a27854bd43..defcaca7199f 100644 --- a/crates/re_data_store/src/store_format.rs +++ b/crates/re_data_store/src/store_format.rs @@ -1,3 +1,4 @@ +use arrow2::datatypes::Metadata; use re_format::{format_bytes, format_uint}; use re_log_types::TimeInt; use re_types_core::SizeBytes as _; @@ -138,9 +139,10 @@ impl std::fmt::Display for IndexedBucket { re_log::error_once!("couldn't display indexed bucket: {err}"); std::fmt::Error })?; - re_format_arrow::format_table( - columns.columns(), - schema.fields.iter().map(|field| field.name.as_str()), + re_format_arrow::format_dataframe( + Metadata::default(), + &schema.fields, + columns.columns().iter().map(|array| &**array), ) .fmt(f)?; @@ -170,9 +172,10 @@ impl std::fmt::Display for StaticTable { re_log::error_once!("couldn't display static table: {err}"); std::fmt::Error })?; - re_format_arrow::format_table( - columns.columns(), - schema.fields.iter().map(|field| field.name.as_str()), + re_format_arrow::format_dataframe( + Metadata::default(), + &schema.fields, + columns.columns().iter().map(|array| &**array), ) .fmt(f)?; diff --git a/crates/re_format_arrow/Cargo.toml b/crates/re_format_arrow/Cargo.toml index 660251caa243..e1d0c32238e5 100644 --- a/crates/re_format_arrow/Cargo.toml +++ b/crates/re_format_arrow/Cargo.toml @@ -21,6 +21,13 @@ all-features = true [dependencies] arrow2.workspace = true -comfy-table.workspace = true re_tuid.workspace = true re_types_core.workspace = true # tuid serialization + +# native dependencies: +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +comfy-table = { workspace = true, features = ["tty"] } + +# web dependencies: +[target.'cfg(target_arch = "wasm32")'.dependencies] +comfy-table = { workspace = true } diff --git a/crates/re_format_arrow/src/lib.rs b/crates/re_format_arrow/src/lib.rs index e7f64b061b79..4d090a7e9f04 100644 --- a/crates/re_format_arrow/src/lib.rs +++ b/crates/re_format_arrow/src/lib.rs @@ -1,12 +1,12 @@ //! Formatting for tables of Arrow arrays -use std::fmt::Formatter; +use std::{borrow::Borrow, fmt::Formatter}; use arrow2::{ array::{get_display, Array, ListArray}, - datatypes::{DataType, IntervalUnit, TimeUnit}, + datatypes::{DataType, Field, IntervalUnit, Metadata, TimeUnit}, }; -use comfy_table::{presets, Cell, Table}; +use comfy_table::{presets, Cell, Row, Table}; use re_tuid::Tuid; use re_types_core::Loggable as _; @@ -21,7 +21,6 @@ use re_types_core::Loggable as _; type CustomFormatter<'a, F> = Box std::fmt::Result + 'a>; fn get_custom_display<'a, F: std::fmt::Write + 'a>( - _column_name: &'a str, array: &'a dyn Array, null: &'static str, ) -> CustomFormatter<'a, F> { @@ -107,9 +106,9 @@ impl std::fmt::Display for DisplayIntervalUnit { //TODO(john) move this and the Display impl upstream into arrow2 #[repr(transparent)] -struct DisplayDataType(DataType); +struct DisplayDatatype<'a>(&'a DataType); -impl std::fmt::Display for DisplayDataType { +impl std::fmt::Display for DisplayDatatype<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let s = match &self.0 { DataType::Null => "null", @@ -157,84 +156,141 @@ impl std::fmt::Display for DisplayDataType { DataType::Utf8 => "str", DataType::LargeUtf8 => "large-string", DataType::List(ref field) => { - let s = format!("list[{}]", Self(field.data_type().clone())); + let s = format!("list[{}]", Self(field.data_type())); return f.write_str(&s); } DataType::FixedSizeList(field, len) => { - let s = format!("fixed-list[{}; {len}]", Self(field.data_type().clone())); + let s = format!("fixed-list[{}; {len}]", Self(field.data_type())); return f.write_str(&s); } DataType::LargeList(field) => { - let s = format!("large-list[{}]", Self(field.data_type().clone())); + let s = format!("large-list[{}]", Self(field.data_type())); return f.write_str(&s); } DataType::Struct(fields) => return write!(f, "struct[{}]", fields.len()), DataType::Union(fields, _, _) => return write!(f, "union[{}]", fields.len()), - DataType::Map(field, _) => { - return write!(f, "map[{}]", Self(field.data_type().clone())) - } + DataType::Map(field, _) => return write!(f, "map[{}]", Self(field.data_type())), DataType::Dictionary(_, _, _) => "dict", DataType::Decimal(_, _) => "decimal", DataType::Decimal256(_, _) => "decimal256", - DataType::Extension(name, data_type, _) => { - let s = format!("extension<{name}>[{}]", Self((**data_type).clone())); - return f.write_str(&s); + DataType::Extension(name, _, _) => { + return f.write_str(trim_name(name)); } }; f.write_str(s) } } -/// Format `columns` into a [`Table`] using `names` as headers. -pub fn format_table(columns: Ia, names: In) -> Table +struct DisplayMetadata<'a>(&'a Metadata, &'a str); + +impl<'a> std::fmt::Display for DisplayMetadata<'a> { + #[inline] + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let Self(metadata, prefix) = self; + f.write_str( + &metadata + .iter() + .map(|(key, value)| format!("{prefix}{}: {value:?}", trim_name(key))) + .collect::>() + .join("\n"), + ) + } +} + +fn trim_name(name: &str) -> &str { + name.trim_start_matches("rerun.archetypes.") + .trim_start_matches("rerun.components.") + .trim_start_matches("rerun.datatypes.") + .trim_start_matches("rerun.controls.") + .trim_start_matches("rerun.blueprint.archetypes.") + .trim_start_matches("rerun.blueprint.components.") + .trim_start_matches("rerun.blueprint.datatypes.") + .trim_start_matches("rerun.field.") + .trim_start_matches("rerun.chunk.") + .trim_start_matches("rerun.") +} + +pub fn format_dataframe( + metadata: impl Borrow, + fields: impl IntoIterator, + columns: impl IntoIterator, +) -> Table where - A: AsRef, - Ia: IntoIterator, - N: AsRef, - In: IntoIterator, + F: Borrow, + C: Borrow, { - let mut table = Table::new(); - table.load_preset(presets::UTF8_FULL); - - const WIDTH_UPPER_BOUNDARY: u16 = 100; + let metadata = metadata.borrow(); - let names = names - .into_iter() - .map(|name| name.as_ref().to_owned()) + let fields = fields.into_iter().collect::>(); + let fields = fields + .iter() + .map(|field| field.borrow()) .collect::>(); - let arrays = columns.into_iter().collect::>(); - let (displayers, lengths): (Vec<_>, Vec<_>) = arrays + let columns = columns.into_iter().collect::>(); + let columns = columns .iter() - .zip(names.iter()) - .map(|(array, name)| { - let formatter = get_custom_display(name, array.as_ref(), "-"); - (formatter, array.as_ref().len()) - }) - .unzip(); - - if displayers.is_empty() { - return table; - } + .map(|column| column.borrow()) + .collect::>(); - let header = names - .iter() - .zip(arrays.iter().map(|array| array.as_ref().data_type())) - .map(|(name, data_type)| { + const MAXIMUM_CELL_CONTENT_WIDTH: u16 = 100; + + let mut outer_table = Table::new(); + outer_table.load_preset(presets::UTF8_FULL); + + let mut table = Table::new(); + table.load_preset(presets::UTF8_FULL); + + outer_table.add_row({ + let mut row = Row::new(); + row.add_cell({ + let cell = Cell::new(format!( + "CHUNK METADATA:\n{}", + DisplayMetadata(metadata, "* ") + )); + + #[cfg(not(target_arch = "wasm32"))] // requires TTY + { + cell.add_attribute(comfy_table::Attribute::Italic) + } + #[cfg(target_arch = "wasm32")] + { + cell + } + }); + row + }); + + let header = fields.iter().map(|field| { + if field.metadata.is_empty() { Cell::new(format!( - "{}\n---\n{}", - name.trim_start_matches("rerun.archetypes.") - .trim_start_matches("rerun.components.") - .trim_start_matches("rerun.datatypes.") - .trim_start_matches("rerun.controls.") - .trim_start_matches("rerun."), - DisplayDataType(data_type.clone()) + "{}\n---\ntype: \"{}\"", // NOLINT + trim_name(&field.name), + DisplayDatatype(field.data_type()), )) - }); + } else { + Cell::new(format!( + "{}\n---\ntype: \"{}\"\n{}", // NOLINT + trim_name(&field.name), + DisplayDatatype(field.data_type()), + DisplayMetadata(&field.metadata, ""), + )) + } + }); table.set_header(header); - for row in 0..lengths[0] { - let cells: Vec<_> = displayers + let displays = columns + .iter() + .map(|array| get_custom_display(&**array, "-")) + .collect::>(); + let num_rows = columns.first().map_or(0, |list_array| list_array.len()); + + if displays.is_empty() || num_rows == 0 { + return table; + } + + for row in 0..num_rows { + let cells: Vec<_> = displays .iter() .map(|disp| { let mut string = String::new(); @@ -243,11 +299,11 @@ where string.clear(); } let chars: Vec<_> = string.chars().collect(); - if chars.len() > WIDTH_UPPER_BOUNDARY as usize { + if chars.len() > MAXIMUM_CELL_CONTENT_WIDTH as usize { Cell::new( chars .into_iter() - .take(WIDTH_UPPER_BOUNDARY.saturating_sub(1).into()) + .take(MAXIMUM_CELL_CONTENT_WIDTH.saturating_sub(1).into()) .chain(['…']) .collect::(), ) @@ -259,16 +315,21 @@ where table.add_row(cells); } - table.set_content_arrangement(comfy_table::ContentArrangement::DynamicFullWidth); + table.set_content_arrangement(comfy_table::ContentArrangement::Dynamic); // NOTE: `Percentage` only works for terminals that report their sizes. - let width = if table.width().is_some() { - comfy_table::Width::Percentage((100.0 / arrays.len() as f32) as u16) - } else { - comfy_table::Width::Fixed(WIDTH_UPPER_BOUNDARY) - }; - table.set_constraints( - std::iter::repeat(comfy_table::ColumnConstraint::UpperBoundary(width)).take(arrays.len()), + if table.width().is_some() { + let percentage = comfy_table::Width::Percentage((100.0 / columns.len() as f32) as u16); + table.set_constraints( + std::iter::repeat(comfy_table::ColumnConstraint::UpperBoundary(percentage)) + .take(columns.len()), + ); + } + + outer_table.add_row(vec![table.trim_fmt()]); + outer_table.set_content_arrangement(comfy_table::ContentArrangement::Dynamic); + outer_table.set_constraints( + std::iter::repeat(comfy_table::ColumnConstraint::ContentWidth).take(columns.len()), ); - table + outer_table } diff --git a/crates/re_log_types/src/arrow_msg.rs b/crates/re_log_types/src/arrow_msg.rs index ea5e9059b4f7..d428c97b7042 100644 --- a/crates/re_log_types/src/arrow_msg.rs +++ b/crates/re_log_types/src/arrow_msg.rs @@ -14,6 +14,8 @@ use arrow2::{array::Array, chunk::Chunk, datatypes::Schema}; /// If the [`ArrowMsg`] has been cloned in a bunch of places, the callback will run for each and /// every instance. /// It is up to the callback implementer to handle this, if needed. +// +// TODO(#6412): probably don't need this anymore. #[allow(clippy::type_complexity)] #[derive(Clone)] pub struct ArrowChunkReleaseCallback(Arc>) + Send + Sync>); diff --git a/crates/re_log_types/src/data_cell.rs b/crates/re_log_types/src/data_cell.rs index f0a9cdec1345..302e5ff902ff 100644 --- a/crates/re_log_types/src/data_cell.rs +++ b/crates/re_log_types/src/data_cell.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use arrow2::datatypes::DataType; +use arrow2::datatypes::{DataType, Field, Metadata}; use re_types_core::{Component, ComponentBatch, ComponentName, DeserializationError, SizeBytes}; @@ -598,10 +598,14 @@ impl std::fmt::Display for DataCell { "DataCell({})", re_format::format_bytes(self.inner.size_bytes as _) ))?; - re_format_arrow::format_table( - // NOTE: wrap in a ListArray so that it looks more cell-like (i.e. single row) - [&*self.to_arrow_monolist()], - [self.component_name()], + re_format_arrow::format_dataframe( + Metadata::default(), + [Field::new( + self.component_name().to_string(), + self.datatype().clone(), + false, + )], + [self.to_arrow_monolist()], ) .fmt(f) } diff --git a/crates/re_log_types/src/data_row.rs b/crates/re_log_types/src/data_row.rs index db2b34879570..38dabf17134a 100644 --- a/crates/re_log_types/src/data_row.rs +++ b/crates/re_log_types/src/data_row.rs @@ -1,4 +1,5 @@ use ahash::HashSetExt; +use arrow2::datatypes::{Field, Metadata}; use nohash_hasher::IntSet; use smallvec::SmallVec; @@ -597,9 +598,16 @@ impl std::fmt::Display for DataRow { )?; } - re_format_arrow::format_table( + re_format_arrow::format_dataframe( + Metadata::default(), + self.cells.iter().map(|cell| { + Field::new( + cell.component_name().to_string(), + cell.datatype().clone(), + false, + ) + }), self.cells.iter().map(|cell| cell.to_arrow_monolist()), - self.cells.iter().map(|cell| cell.component_name()), ) .fmt(f) } diff --git a/crates/re_log_types/src/data_table.rs b/crates/re_log_types/src/data_table.rs index 43b556317f0d..334f106336d5 100644 --- a/crates/re_log_types/src/data_table.rs +++ b/crates/re_log_types/src/data_table.rs @@ -1054,10 +1054,10 @@ impl std::fmt::Display for DataTable { re_log::error_once!("couldn't display data table: {err}"); std::fmt::Error })?; - writeln!(f, "DataTable({}):", self.table_id)?; - re_format_arrow::format_table( - columns.columns(), - schema.fields.iter().map(|field| field.name.as_str()), + re_format_arrow::format_dataframe( + schema.metadata.clone(), + schema.fields.clone(), + columns.columns().iter().map(|x| x.as_ref()), ) .fmt(f) }