From ce361be42fb68a44ccd1870fa4516230c33eef28 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Wed, 28 Feb 2024 10:25:26 +0100 Subject: [PATCH] `enum` codegen for Python (#5319) ### What * Part of https://github.com/rerun-io/rerun/issues/3384 Even includes codegen of the arrow serialization, which is a first for Python. Best reviewed commit-by-commit ### Checklist * [x] I have read and agree to [Contributor Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md) * [x] I've included a screenshot or gif (if applicable) * [x] I have tested the web demo (if applicable): * Using newly built examples: [app.rerun.io](https://app.rerun.io/pr/5319/index.html) * Using examples from latest `main` build: [app.rerun.io](https://app.rerun.io/pr/5319/index.html?manifest_url=https://app.rerun.io/version/main/examples_manifest.json) * Using full set of examples from `nightly` build: [app.rerun.io](https://app.rerun.io/pr/5319/index.html?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json) * [x] The PR title and labels are set such as to maximize their usefulness for the next release's CHANGELOG * [x] If applicable, add a new check to the [release checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)! - [PR Build Summary](https://build.rerun.io/pr/5319) - [Docs preview](https://rerun.io/preview/2f1d162e5f221d9c0a78b1bcff1524fec67e110c/docs) - [Examples preview](https://rerun.io/preview/2f1d162e5f221d9c0a78b1bcff1524fec67e110c/examples) - [Recent benchmark results](https://build.rerun.io/graphs/crates.html) - [Wasm size tracking](https://build.rerun.io/graphs/sizes.html) --- .../re_types/definitions/rerun/datatypes.fbs | 1 + .../rerun/testing/datatypes/enum.fbs | 22 ++ .../src/testing/datatypes/.gitattributes | 1 + .../src/testing/datatypes/enum_test.rs | 203 ++++++++++++ crates/re_types/src/testing/datatypes/mod.rs | 2 + crates/re_types_builder/src/codegen/common.rs | 21 +- .../src/codegen/python/mod.rs | 303 ++++++++++++------ .../re_types_builder/src/codegen/rust/api.rs | 6 +- .../src/codegen/rust/blueprint_validation.rs | 2 +- crates/re_types_builder/src/objects.rs | 22 +- rerun_cpp/tests/generated/datatypes.hpp | 1 + .../tests/generated/datatypes/.gitattributes | 2 + .../tests/generated/datatypes/enum_test.cpp | 64 ++++ .../tests/generated/datatypes/enum_test.hpp | 63 ++++ .../tests/test_types/datatypes/.gitattributes | 1 + .../tests/test_types/datatypes/__init__.py | 6 + .../tests/test_types/datatypes/enum_test.py | 95 ++++++ 17 files changed, 701 insertions(+), 114 deletions(-) create mode 100644 crates/re_types/definitions/rerun/testing/datatypes/enum.fbs create mode 100644 crates/re_types/src/testing/datatypes/enum_test.rs create mode 100644 rerun_cpp/tests/generated/datatypes/enum_test.cpp create mode 100644 rerun_cpp/tests/generated/datatypes/enum_test.hpp create mode 100644 rerun_py/tests/test_types/datatypes/enum_test.py diff --git a/crates/re_types/definitions/rerun/datatypes.fbs b/crates/re_types/definitions/rerun/datatypes.fbs index a58086f2b59b..334ad14deedc 100644 --- a/crates/re_types/definitions/rerun/datatypes.fbs +++ b/crates/re_types/definitions/rerun/datatypes.fbs @@ -32,5 +32,6 @@ include "./datatypes/vec2d.fbs"; include "./datatypes/vec3d.fbs"; include "./datatypes/vec4d.fbs"; +include "./testing/datatypes/enum.fbs"; namespace rerun.datatypes; diff --git a/crates/re_types/definitions/rerun/testing/datatypes/enum.fbs b/crates/re_types/definitions/rerun/testing/datatypes/enum.fbs new file mode 100644 index 000000000000..e524e078d1d9 --- /dev/null +++ b/crates/re_types/definitions/rerun/testing/datatypes/enum.fbs @@ -0,0 +1,22 @@ +namespace rerun.testing.datatypes; + +/// A test of the enum type. +enum EnumTest: byte { + /// Great film. + Up, + + /// Feeling blue. + Down, + + /// Correct. + Right, + + /// It's what's remaining. + Left, + + /// It's the only way to go. + Forward, + + /// Baby's got it. + Back, +} diff --git a/crates/re_types/src/testing/datatypes/.gitattributes b/crates/re_types/src/testing/datatypes/.gitattributes index 65697d6023ea..c513271d6caa 100644 --- a/crates/re_types/src/testing/datatypes/.gitattributes +++ b/crates/re_types/src/testing/datatypes/.gitattributes @@ -9,6 +9,7 @@ affix_fuzzer22.rs linguist-generated=true affix_fuzzer3.rs linguist-generated=true affix_fuzzer4.rs linguist-generated=true affix_fuzzer5.rs linguist-generated=true +enum_test.rs linguist-generated=true flattened_scalar.rs linguist-generated=true mod.rs linguist-generated=true primitive_component.rs linguist-generated=true diff --git a/crates/re_types/src/testing/datatypes/enum_test.rs b/crates/re_types/src/testing/datatypes/enum_test.rs new file mode 100644 index 000000000000..974d2a2e2636 --- /dev/null +++ b/crates/re_types/src/testing/datatypes/enum_test.rs @@ -0,0 +1,203 @@ +// DO NOT EDIT! This file was auto-generated by crates/re_types_builder/src/codegen/rust/api.rs +// Based on "crates/re_types/definitions/rerun/testing/datatypes/enum.fbs". + +#![allow(trivial_numeric_casts)] +#![allow(unused_imports)] +#![allow(unused_parens)] +#![allow(clippy::clone_on_copy)] +#![allow(clippy::iter_on_single_items)] +#![allow(clippy::map_flatten)] +#![allow(clippy::match_wildcard_for_single_variants)] +#![allow(clippy::needless_question_mark)] +#![allow(clippy::new_without_default)] +#![allow(clippy::redundant_closure)] +#![allow(clippy::too_many_arguments)] +#![allow(clippy::too_many_lines)] +#![allow(clippy::unnecessary_cast)] + +use ::re_types_core::external::arrow2; +use ::re_types_core::ComponentName; +use ::re_types_core::SerializationResult; +use ::re_types_core::{ComponentBatch, MaybeOwnedComponentBatch}; +use ::re_types_core::{DeserializationError, DeserializationResult}; + +/// **Datatype**: A test of the enum type. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum EnumTest { + /// Great film. + Up = 1, + + /// Feeling blue. + Down = 2, + + /// Correct. + Right = 3, + + /// It's what's remaining. + Left = 4, + + /// It's the only way to go. + Forward = 5, + + /// Baby's got it. + Back = 6, +} + +impl ::re_types_core::SizeBytes for EnumTest { + #[inline] + fn heap_size_bytes(&self) -> u64 { + 0 + } + + #[inline] + fn is_pod() -> bool { + true + } +} + +::re_types_core::macros::impl_into_cow!(EnumTest); + +impl ::re_types_core::Loggable for EnumTest { + type Name = ::re_types_core::DatatypeName; + + #[inline] + fn name() -> Self::Name { + "rerun.testing.datatypes.EnumTest".into() + } + + #[allow(clippy::wildcard_imports)] + #[inline] + fn arrow_datatype() -> arrow2::datatypes::DataType { + use arrow2::datatypes::*; + DataType::Union( + std::sync::Arc::new(vec![ + Field { + name: "_null_markers".to_owned(), + data_type: DataType::Null, + is_nullable: true, + metadata: [].into(), + }, + Field { + name: "Up".to_owned(), + data_type: DataType::Null, + is_nullable: false, + metadata: [].into(), + }, + Field { + name: "Down".to_owned(), + data_type: DataType::Null, + is_nullable: false, + metadata: [].into(), + }, + Field { + name: "Right".to_owned(), + data_type: DataType::Null, + is_nullable: false, + metadata: [].into(), + }, + Field { + name: "Left".to_owned(), + data_type: DataType::Null, + is_nullable: false, + metadata: [].into(), + }, + Field { + name: "Forward".to_owned(), + data_type: DataType::Null, + is_nullable: false, + metadata: [].into(), + }, + Field { + name: "Back".to_owned(), + data_type: DataType::Null, + is_nullable: false, + metadata: [].into(), + }, + ]), + Some(std::sync::Arc::new(vec![ + 0i32, 1i32, 2i32, 3i32, 4i32, 5i32, 6i32, + ])), + UnionMode::Sparse, + ) + } + + #[allow(clippy::wildcard_imports)] + fn to_arrow_opt<'a>( + data: impl IntoIterator>>>, + ) -> SerializationResult> + where + Self: Clone + 'a, + { + use ::re_types_core::{Loggable as _, ResultExt as _}; + use arrow2::{array::*, datatypes::*}; + Ok({ + let data: Vec<_> = data + .into_iter() + .map(|datum| { + let datum: Option<::std::borrow::Cow<'a, Self>> = datum.map(Into::into); + datum + }) + .collect(); + let num_variants = 6usize; + let types = data + .iter() + .map(|a| match a.as_deref() { + None => 0, + Some(value) => *value as i8, + }) + .collect(); + let fields: Vec<_> = + std::iter::repeat(NullArray::new(DataType::Null, data.len()).boxed()) + .take(1 + num_variants) + .collect(); + UnionArray::new( + ::arrow_datatype(), + types, + fields, + None, + ) + .boxed() + }) + } + + #[allow(clippy::wildcard_imports)] + fn from_arrow_opt( + arrow_data: &dyn arrow2::array::Array, + ) -> DeserializationResult>> + where + Self: Sized, + { + use ::re_types_core::{Loggable as _, ResultExt as _}; + use arrow2::{array::*, buffer::*, datatypes::*}; + Ok({ + let arrow_data = arrow_data + .as_any() + .downcast_ref::() + .ok_or_else(|| { + let expected = Self::arrow_datatype(); + let actual = arrow_data.data_type().clone(); + DeserializationError::datatype_mismatch(expected, actual) + }) + .with_context("rerun.testing.datatypes.EnumTest")?; + let arrow_data_types = arrow_data.types(); + arrow_data_types + .iter() + .map(|typ| match typ { + 0 => Ok(None), + 1 => Ok(Some(EnumTest::Up)), + 2 => Ok(Some(EnumTest::Down)), + 3 => Ok(Some(EnumTest::Right)), + 4 => Ok(Some(EnumTest::Left)), + 5 => Ok(Some(EnumTest::Forward)), + 6 => Ok(Some(EnumTest::Back)), + _ => Err(DeserializationError::missing_union_arm( + Self::arrow_datatype(), + "", + *typ as _, + )), + }) + .collect::>>() + .with_context("rerun.testing.datatypes.EnumTest")? + }) + } +} diff --git a/crates/re_types/src/testing/datatypes/mod.rs b/crates/re_types/src/testing/datatypes/mod.rs index 87fac7ac1b1d..48371d8aeb6b 100644 --- a/crates/re_types/src/testing/datatypes/mod.rs +++ b/crates/re_types/src/testing/datatypes/mod.rs @@ -10,6 +10,7 @@ mod affix_fuzzer3_ext; mod affix_fuzzer4; mod affix_fuzzer4_ext; mod affix_fuzzer5; +mod enum_test; mod flattened_scalar; mod primitive_component; mod string_component; @@ -22,6 +23,7 @@ pub use self::affix_fuzzer22::AffixFuzzer22; pub use self::affix_fuzzer3::AffixFuzzer3; pub use self::affix_fuzzer4::AffixFuzzer4; pub use self::affix_fuzzer5::AffixFuzzer5; +pub use self::enum_test::EnumTest; pub use self::flattened_scalar::FlattenedScalar; pub use self::primitive_component::PrimitiveComponent; pub use self::string_component::StringComponent; diff --git a/crates/re_types_builder/src/codegen/common.rs b/crates/re_types_builder/src/codegen/common.rs index 5211944a1caa..8ed35fed7089 100644 --- a/crates/re_types_builder/src/codegen/common.rs +++ b/crates/re_types_builder/src/codegen/common.rs @@ -293,18 +293,29 @@ pub fn collect_examples_for_api_docs<'a>( } pub trait StringExt { - fn push_text(&mut self, text: impl AsRef, linefeeds: usize, indent: usize) -> &mut Self; - fn push_unindented_text(&mut self, text: impl AsRef, linefeeds: usize) -> &mut Self; + fn push_indented( + &mut self, + indent_level: usize, + text: impl AsRef, + linefeeds: usize, + ) -> &mut Self; + + fn push_unindented(&mut self, text: impl AsRef, linefeeds: usize) -> &mut Self; } impl StringExt for String { - fn push_text(&mut self, text: impl AsRef, linefeeds: usize, indent: usize) -> &mut Self { - self.push_str(&indent::indent_all_by(indent, text.as_ref())); + fn push_indented( + &mut self, + indent_level: usize, + text: impl AsRef, + linefeeds: usize, + ) -> &mut Self { + self.push_str(&indent::indent_all_by(indent_level * 4, text.as_ref())); self.push_str(&vec!["\n"; linefeeds].join("")); self } - fn push_unindented_text(&mut self, text: impl AsRef, linefeeds: usize) -> &mut Self { + fn push_unindented(&mut self, text: impl AsRef, linefeeds: usize) -> &mut Self { self.push_str(&unindent::unindent(text.as_ref())); self.push_str(&vec!["\n"; linefeeds].join("")); self diff --git a/crates/re_types_builder/src/codegen/python/mod.rs b/crates/re_types_builder/src/codegen/python/mod.rs index ec7aa85cb897..3a8968f3686c 100644 --- a/crates/re_types_builder/src/codegen/python/mod.rs +++ b/crates/re_types_builder/src/codegen/python/mod.rs @@ -5,6 +5,7 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use anyhow::Context as _; use camino::{Utf8Path, Utf8PathBuf}; use itertools::Itertools; +use unindent::unindent; use crate::{ codegen::{ @@ -332,16 +333,16 @@ impl PythonCodeGenerator { .extend(names.iter().cloned()); let mut code = String::new(); - code.push_text(&format!("# {}", autogen_warning!()), 1, 0); + code.push_indented(0, &format!("# {}", autogen_warning!()), 1); if let Some(source_path) = obj.relative_filepath() { - code.push_text(&format!("# Based on {:?}.", format_path(source_path)), 2, 0); - code.push_text( + code.push_indented(0, &format!("# Based on {:?}.", format_path(source_path)), 2); + code.push_indented( + 0, &format!( "# You can extend this class by creating a {:?} class in {:?}.", ext_class.name, ext_class.file_name ), 2, - 0, ); } @@ -355,7 +356,7 @@ impl PythonCodeGenerator { ".." }; - code.push_unindented_text( + code.push_unindented( format!( " from __future__ import annotations @@ -401,7 +402,7 @@ impl PythonCodeGenerator { ); if ext_class.found { - code.push_unindented_text( + code.push_unindented( format!("from .{} import {}", ext_class.module_name, ext_class.name,), 1, ); @@ -420,34 +421,29 @@ impl PythonCodeGenerator { })) .collect(); for clause in import_clauses { - code.push_text(&clause, 1, 0); + code.push_indented(0, &clause, 1); } if !manifest.is_empty() { - code.push_unindented_text(format!("\n__all__ = [{manifest}]\n\n\n"), 0); + code.push_unindented(format!("\n__all__ = [{manifest}]\n\n\n"), 0); } let obj_code = match obj.class { crate::objects::ObjectClass::Struct => { code_for_struct(reporter, arrow_registry, &ext_class, objects, obj) } + crate::objects::ObjectClass::Enum => { + code_for_enum(reporter, arrow_registry, &ext_class, objects, obj) + } crate::objects::ObjectClass::Union => { code_for_union(arrow_registry, &ext_class, objects, obj) } - crate::objects::ObjectClass::Enum => { - reporter.error( - &obj.virtpath, - &obj.fqname, - "Enums are not implemented in Python", - ); - continue; - } }; - code.push_text(&obj_code, 1, 0); + code.push_indented(0, &obj_code, 1); if ext_class.has_deferred_patch_class { - code.push_unindented_text( + code.push_unindented( format!("{}.deferred_patch_class({})", ext_class.name, obj.name), 1, ); @@ -477,8 +473,8 @@ fn write_init_file( let path = kind_path.join("__init__.py"); let mut code = String::new(); let manifest = quote_manifest(mods.iter().flat_map(|(_, names)| names.iter())); - code.push_text(&format!("# {}", autogen_warning!()), 2, 0); - code.push_unindented_text( + code.push_indented(0, &format!("# {}", autogen_warning!()), 2); + code.push_unindented( " from __future__ import annotations @@ -487,10 +483,10 @@ fn write_init_file( ); for (module, names) in mods { let names = names.join(", "); - code.push_text(&format!("from .{module} import {names}"), 1, 0); + code.push_indented(0, &format!("from .{module} import {names}"), 1); } if !manifest.is_empty() { - code.push_unindented_text(format!("\n__all__ = [{manifest}]"), 0); + code.push_unindented(format!("\n__all__ = [{manifest}]"), 0); } files_to_write.insert(path, code); } @@ -502,7 +498,7 @@ fn lib_source_code(archetype_names: &[String]) -> String { let mut code = String::new(); - code += &unindent::unindent(&format!( + code += &unindent(&format!( r#" # {autogen_warning} @@ -560,7 +556,7 @@ fn code_for_struct( format!("converter={typ_unwrapped}Batch._required, # type: ignore[misc]\n") } } else if !default_converter.is_empty() { - code.push_text(&converter_function, 1, 0); + code.push_indented(0, &converter_function, 1); format!("converter={default_converter}") } else { String::new() @@ -595,7 +591,7 @@ fn code_for_struct( } if let Some(deprecation_notice) = obj.deprecation_notice() { - code.push_unindented_text(format!(r#"@deprecated("""{deprecation_notice}""")"#), 1); + code.push_unindented(format!(r#"@deprecated("""{deprecation_notice}""")"#), 1); } if !obj.is_delegating_component() { @@ -604,7 +600,7 @@ fn code_for_struct( } else { "init=False" }; - code.push_unindented_text(format!("@define({define_args})"), 1); + code.push_unindented(format!("@define({define_args})"), 1); } let superclass_decl = if superclasses.is_empty() { @@ -612,47 +608,47 @@ fn code_for_struct( } else { format!("({})", superclasses.join(",")) }; - code.push_unindented_text(format!("class {name}{superclass_decl}:"), 1); + code.push_unindented(format!("class {name}{superclass_decl}:"), 1); - code.push_text(quote_obj_docs(obj), 0, 4); + code.push_indented(1, quote_obj_docs(obj), 0); if ext_class.has_init { - code.push_text( + code.push_indented( + 1, format!("# __init__ can be found in {}", ext_class.file_name), 2, - 4, ); } else if obj.is_delegating_component() { - code.push_text( + code.push_indented( + 1, format!( "# You can define your own __init__ function as a member of {} in {}", ext_class.name, ext_class.file_name ), 2, - 4, ); } else { // In absence of a an extension class __init__ method, we don't *need* an __init__ method here. // But if we don't generate one, LSP will show the class's doc string instead of parameter documentation. - code.push_text(quote_init_method(reporter, obj, ext_class, objects), 2, 4); + code.push_indented(1, quote_init_method(reporter, obj, ext_class, objects), 2); } if obj.kind == ObjectKind::Archetype { - code.push_text(quote_clear_methods(obj), 2, 4); + code.push_indented(1, quote_clear_methods(obj), 2); } if obj.is_delegating_component() { - code.push_text( + code.push_indented( + 1, format!( "# Note: there are no fields here because {} delegates to datatypes.{}", obj.name, obj.delegate_datatype(objects).unwrap().name ), 1, - 4, ); - code.push_text("pass", 2, 4); + code.push_indented(1, "pass", 2); } else { // NOTE: We need to add required fields first, and then optional ones, otherwise mypy // complains. @@ -705,49 +701,159 @@ fn code_for_struct( ) }; - code.push_text(format!("{name}: {typ}"), 1, 4); + code.push_indented(1, format!("{name}: {typ}"), 1); // Generating docs for all the fields creates A LOT of visual noise in the API docs. let show_fields_in_docs = false; let doc_lines = lines_from_docs(&field.docs); if !doc_lines.is_empty() { if show_fields_in_docs { - code.push_text(quote_doc_lines(doc_lines), 0, 4); + code.push_indented(1, quote_doc_lines(doc_lines), 0); } else { // Still include it for those that are reading the source file: for line in doc_lines { - code.push_text(format!("# {line}"), 1, 4); + code.push_indented(1, format!("# {line}"), 1); } - code.push_text("#", 1, 4); - code.push_text( - "# (Docstring intentionally commented out to hide this field from the docs)", - 2, - 4, - ); + code.push_indented(1, "#", 1); + code.push_indented(1, "# (Docstring intentionally commented out to hide this field from the docs)", 2); } } } if *kind == ObjectKind::Archetype { - code.push_text("__str__ = Archetype.__str__", 1, 4); - code.push_text("__repr__ = Archetype.__repr__", 1, 4); + code.push_indented(1, "__str__ = Archetype.__str__", 1); + code.push_indented(1, "__repr__ = Archetype.__repr__", 1); } - code.push_text(quote_array_method_from_obj(ext_class, objects, obj), 1, 4); - code.push_text(quote_native_types_method_from_obj(objects, obj), 1, 4); + code.push_indented(1, quote_array_method_from_obj(ext_class, objects, obj), 1); + code.push_indented(1, quote_native_types_method_from_obj(objects, obj), 1); if *kind != ObjectKind::Archetype { - code.push_text(quote_aliases_from_object(obj), 1, 0); + code.push_indented(0, quote_aliases_from_object(obj), 1); } } match kind { ObjectKind::Archetype => (), ObjectKind::Datatype | ObjectKind::Component => { - code.push_text( - quote_arrow_support_from_obj(arrow_registry, ext_class, objects, obj), + code.push_indented( + 0, + quote_arrow_support_from_obj(arrow_registry, ext_class, objects, obj, None), 1, + ); + } + } + + code +} + +fn code_for_enum( + reporter: &Reporter, + arrow_registry: &ArrowRegistry, + ext_class: &ExtensionClass, + objects: &Objects, + obj: &Object, +) -> String { + assert_eq!(obj.class, ObjectClass::Enum); + assert_eq!(obj.kind, ObjectKind::Datatype); + + let Object { name, .. } = obj; + + let mut code = String::new(); + + code.push_unindented("from enum import Enum", 2); + + if let Some(deprecation_notice) = obj.deprecation_notice() { + code.push_unindented(format!(r#"@deprecated("""{deprecation_notice}""")"#), 1); + } + + code.push_str(&format!("class {name}(Enum):\n")); + code.push_indented(1, quote_obj_docs(obj), 0); + + for (i, variant) in obj.fields.iter().enumerate() { + let arrow_type_index = 1 + i; // plus-one to leave room for zero == `_null_markers` + let variant_name = variant.screaming_snake_case_name(); + code.push_indented(1, format!("{variant_name} = {arrow_type_index}"), 1); + + // Generating docs for all the fields creates A LOT of visual noise in the API docs. + let show_fields_in_docs = true; + let doc_lines = lines_from_docs(&variant.docs); + if !doc_lines.is_empty() { + if show_fields_in_docs { + code.push_indented(1, quote_doc_lines(doc_lines), 0); + } else { + // Still include it for those that are reading the source file: + for line in doc_lines { + code.push_indented(1, format!("# {line}"), 1); + } + code.push_indented(1, "#", 1); + code.push_indented( + 1, + "# (Docstring intentionally commented out to hide this field from the docs)", + 2, + ); + } + } + } + + code.push_unindented(format!("{name}Like = {name}"), 1); + code.push_unindented( + format!( + r#" + {name}ArrayLike = Union[ + {name}, + Sequence[{name}Like] + ] + "#, + ), + 2, + ); + + let native_to_pa_array_impl = unindent(&format!( + r##" + if isinstance(data, {name}): + data = [data] + + types: list[int] = [] + + for value in data: + if value is None: + types.append(0) + elif isinstance(value, {name}): + types.append(value.value) # Actual enum value + elif isinstance(value, int): + types.append(value) # By number + elif isinstance(value, str): + types.append({name}[value].value) # By name + else: + raise ValueError(f"Unknown {name} kind: {{value}}") + + return pa.UnionArray.from_buffers( + type=data_type, + length=len(data), + buffers=[ + None, + pa.array(types, type=pa.int8()).buffers()[1], + ], + ) + "## + )); + + match obj.kind { + ObjectKind::Archetype => { + reporter.error(&obj.virtpath, &obj.fqname, "An archetype cannot be an enum"); + } + ObjectKind::Component | ObjectKind::Datatype => { + code.push_indented( 0, + quote_arrow_support_from_obj( + arrow_registry, + ext_class, + objects, + obj, + Some(native_to_pa_array_impl), + ), + 1, ); } } @@ -795,10 +901,10 @@ fn code_for_union( }; if let Some(deprecation_notice) = obj.deprecation_notice() { - code.push_unindented_text(format!(r#"@deprecated("""{deprecation_notice}""")"#), 1); + code.push_unindented(format!(r#"@deprecated("""{deprecation_notice}""")"#), 1); } - code.push_unindented_text( + code.push_unindented( format!( r#" @@ -809,22 +915,22 @@ fn code_for_union( 0, ); - code.push_text(quote_obj_docs(obj), 0, 4); + code.push_indented(1, quote_obj_docs(obj), 0); if ext_class.has_init { - code.push_text( + code.push_indented( + 1, format!("# __init__ can be found in {}", ext_class.file_name), 2, - 4, ); } else { - code.push_text( + code.push_indented( + 1, format!( "# You can define your own __init__ function as a member of {} in {}", ext_class.name, ext_class.file_name ), 2, - 4, ); } @@ -868,12 +974,12 @@ fn code_for_union( }; // Note: mypy gets confused using staticmethods for field-converters - code.push_text( + code.push_indented( + 1, format!("inner: {inner_type} = field({converter} {type_ignore}\n)"), 1, - 4, ); - code.push_text(quote_doc_from_fields(objects, fields), 0, 4); + code.push_indented(1, quote_doc_from_fields(objects, fields), 0); // if there are duplicate types, we need to add a `kind` field to disambiguate the union if has_duplicate_types { @@ -883,16 +989,16 @@ fn code_for_union( .join(", "); let first_kind = &fields[0].snake_case_name(); - code.push_text( + code.push_indented( + 1, format!("kind: Literal[{kind_type}] = field(default={first_kind:?})"), 1, - 4, ); - code.push_text(quote_union_kind_from_fields(fields), 0, 4); + code.push_indented(1, quote_union_kind_from_fields(fields), 0); } - code.push_unindented_text(quote_union_aliases_from_object(obj, field_types.iter()), 1); + code.push_unindented(quote_union_aliases_from_object(obj, field_types.iter()), 1); match kind { ObjectKind::Archetype => (), @@ -900,10 +1006,10 @@ fn code_for_union( unreachable!("component may not be a union") } ObjectKind::Datatype => { - code.push_text( - quote_arrow_support_from_obj(arrow_registry, ext_class, objects, obj), - 1, + code.push_indented( 0, + quote_arrow_support_from_obj(arrow_registry, ext_class, objects, obj, None), + 1, ); } } @@ -1138,7 +1244,7 @@ fn quote_array_method_from_obj( } let field_name = &obj.fields[0].name; - unindent::unindent(&format!( + unindent(&format!( " def __array__(self, dtype: npt.DTypeLike=None) -> npt.NDArray[Any]: # You can define your own __array__ function as a member of {} in {} @@ -1170,7 +1276,7 @@ fn quote_native_types_method_from_obj(objects: &Objects, obj: &Object) -> String } let field_name = &obj.fields[0].name; - unindent::unindent(&format!( + unindent(&format!( " def __{typ}__(self) -> {typ}: return {typ}(self.{field_name}) @@ -1191,7 +1297,7 @@ fn quote_aliases_from_object(obj: &Object) -> String { let mut code = String::new(); - code.push_unindented_text( + code.push_unindented( &if let Some(aliases) = aliases { format!( r#" @@ -1210,7 +1316,7 @@ fn quote_aliases_from_object(obj: &Object) -> String { 1, ); - code.push_unindented_text( + code.push_unindented( format!( r#" {name}ArrayLike = Union[ @@ -1248,7 +1354,7 @@ fn quote_union_aliases_from_object<'a>( String::new() }; - unindent::unindent(&format!( + unindent(&format!( r#" if TYPE_CHECKING: {name}Like = Union[ @@ -1475,7 +1581,7 @@ fn quote_field_converter_from_field( // generate the converter function if field.is_nullable { - function.push_unindented_text( + function.push_unindented( format!( r#" def {converter_name}(x: {typ}Like | None) -> {typ} | None: @@ -1490,7 +1596,7 @@ fn quote_field_converter_from_field( 1, ); } else { - function.push_unindented_text( + function.push_unindented( format!( r#" def {converter_name}(x: {typ}Like) -> {typ}: @@ -1572,6 +1678,7 @@ fn quote_arrow_support_from_obj( ext_class: &ExtensionClass, objects: &Objects, obj: &Object, + native_to_pa_array_impl: Option, ) -> String { let Object { fqname, name, .. } = obj; @@ -1612,17 +1719,19 @@ fn quote_arrow_support_from_obj( let extension_batch = format!("{name}Batch"); let extension_type = format!("{name}Type"); - let override_ = if ext_class.has_native_to_pa_array { - format!( - "return {}.{NATIVE_TO_PA_ARRAY_METHOD}(data, data_type)", - ext_class.name - ) - } else { - format!( - "raise NotImplementedError # You need to implement {NATIVE_TO_PA_ARRAY_METHOD} in {}", - ext_class.file_name - ) - }; + let native_to_pa_array_impl = native_to_pa_array_impl.unwrap_or_else(|| { + if ext_class.has_native_to_pa_array { + format!( + "return {}.{NATIVE_TO_PA_ARRAY_METHOD}(data, data_type)", + ext_class.name + ) + } else { + format!( + "raise NotImplementedError # You need to implement {NATIVE_TO_PA_ARRAY_METHOD} in {}", + ext_class.file_name + ) + } + }); let type_superclass_decl = if type_superclasses.is_empty() { String::new() @@ -1638,7 +1747,7 @@ fn quote_arrow_support_from_obj( if obj.kind == ObjectKind::Datatype || obj.is_non_delegating_component() { // Datatypes and non-delegating components declare init - unindent::unindent(&format!( + let mut code = unindent(&format!( r#" class {extension_type}{type_superclass_decl}: _TYPE_NAME: str = "{fqname}" @@ -1653,12 +1762,13 @@ fn quote_arrow_support_from_obj( @staticmethod def _native_to_pa_array(data: {many_aliases}, data_type: pa.DataType) -> pa.Array: - {override_} "# - )) + )); + code.push_indented(2, native_to_pa_array_impl, 1); + code } else { // Delegating components are already inheriting from their base type - unindent::unindent(&format!( + unindent(&format!( r#" class {extension_type}{type_superclass_decl}: _TYPE_NAME: str = "{fqname}" @@ -1838,7 +1948,7 @@ fn quote_init_method( // Make sure Archetypes catch and log exceptions as a fallback let forwarding_call = if obj.kind == ObjectKind::Archetype { - unindent::unindent(&format!( + unindent(&format!( r#" with catch_and_log_exceptions(context=self.__class__.__name__): {forwarding_call} @@ -1868,7 +1978,7 @@ fn quote_clear_methods(obj: &Object) -> String { let classname = &obj.name; - unindent::unindent(&format!( + unindent(&format!( r#" def __attrs_clear__(self) -> None: """Convenience method for calling `__attrs_init__` with all `None`s.""" @@ -1956,8 +2066,11 @@ fn quote_arrow_field(field: &Field) -> String { metadata, } = field; + // The python Arrow API requires that all null-fields are marked as nullable: + let is_nullable = *is_nullable || *data_type == DataType::Null; + let datatype = quote_arrow_datatype(data_type); - let is_nullable = is_nullable.then_some("True").unwrap_or("False"); + let is_nullable = if is_nullable { "True" } else { "False" }; let metadata = quote_metadata_map(metadata); format!(r#"pa.field("{name}", {datatype}, nullable={is_nullable}, metadata={metadata})"#) diff --git a/crates/re_types_builder/src/codegen/rust/api.rs b/crates/re_types_builder/src/codegen/rust/api.rs index afc7acb8252d..44a2d9eac0ef 100644 --- a/crates/re_types_builder/src/codegen/rust/api.rs +++ b/crates/re_types_builder/src/codegen/rust/api.rs @@ -192,11 +192,11 @@ fn generate_object_file( match &token { // If this is a doc-comment block, be smart about it. proc_macro2::TokenTree::Punct(punct) if punct.as_char() == '#' => { - code.push_text(string_from_quoted(&acc), 1, 0); + code.push_indented(0, string_from_quoted(&acc), 1); acc = TokenStream::new(); acc.extend([token, tokens.next().unwrap()]); - code.push_text(acc.to_string(), 1, 0); + code.push_indented(0, acc.to_string(), 1); acc = TokenStream::new(); } _ => { @@ -205,7 +205,7 @@ fn generate_object_file( } } - code.push_text(string_from_quoted(&acc), 1, 0); + code.push_indented(0, string_from_quoted(&acc), 1); replace_doc_attrb_with_doc_comment(&code) } diff --git a/crates/re_types_builder/src/codegen/rust/blueprint_validation.rs b/crates/re_types_builder/src/codegen/rust/blueprint_validation.rs index c18c6af779db..bece23e7a4d0 100644 --- a/crates/re_types_builder/src/codegen/rust/blueprint_validation.rs +++ b/crates/re_types_builder/src/codegen/rust/blueprint_validation.rs @@ -53,7 +53,7 @@ pub(crate) fn generate_blueprint_validation( } }; - code.push_text(string_from_quoted(&is_valid_blueprint), 1, 0); + code.push_indented(0, string_from_quoted(&is_valid_blueprint), 1); files_to_write.insert( Utf8PathBuf::from("crates/re_viewer/src/blueprint/validation_gen/mod.rs"), diff --git a/crates/re_types_builder/src/objects.rs b/crates/re_types_builder/src/objects.rs index 148b10fa0d40..032f9edfdd76 100644 --- a/crates/re_types_builder/src/objects.rs +++ b/crates/re_types_builder/src/objects.rs @@ -71,23 +71,24 @@ impl Objects { // Validate fields types: Archetype consist of components, everything else consists of datatypes. for obj in this.objects.values() { for field in &obj.fields { + let virtpath = &field.virtpath; if let Some(field_type_fqname) = field.typ.fqname() { let field_obj = &this[field_type_fqname]; if obj.kind == ObjectKind::Archetype { assert!(field_obj.kind == ObjectKind::Component, - "Field {:?} (pointing to an instance of {:?}) is part of an archetypes but is not a component. Only components are allowed as fields on an Archetype.", + "{virtpath}: Field {:?} (pointing to an instance of {:?}) is part of an archetypes but is not a component. Only components are allowed as fields on an Archetype.", field.fqname, field_type_fqname ); } else { assert!(field_obj.kind == ObjectKind::Datatype, - "Field {:?} (pointing to an instance of {:?}) is part of a Component or Datatype but is itself not a Datatype. Only Archetype fields can be Components, all other fields have to be primitive or be a datatypes.", + "{virtpath}: Field {:?} (pointing to an instance of {:?}) is part of a Component or Datatype but is itself not a Datatype. Only Archetype fields can be Components, all other fields have to be primitive or be a datatypes.", field.fqname, field_type_fqname ); } } else { // Note that we *do* allow primitive fields on components for the moment. Not doing so creates a lot of bloat. assert!(obj.kind != ObjectKind::Archetype, - "Field {:?} is a primitive field which is part of an Archetype. Only Components are allowed on Archetypes.", + "{virtpath}: Field {:?} is a primitive field which is part of an Archetype. Only Components are allowed on Archetypes.", field.fqname); } } @@ -558,8 +559,8 @@ impl Object { let fields: Vec<_> = enm .values() .iter() - // NOTE: `BaseType::None` is only used by internal flatbuffers fields, we don't care. .filter(|val| { + // NOTE: `BaseType::None` is only used by internal flatbuffers fields, we don't care. is_enum || val .union_type() @@ -571,12 +572,8 @@ impl Object { }) .collect(); - if kind == ObjectKind::Component { - assert!( - fields.len() == 1, - "components must have exactly 1 field, but {fqname} has {}", - fields.len() - ); + if kind == ObjectKind::Component && fields.len() != 1 { + reporter.error(&virtpath, &fqname, "components must have exactly 1 field"); } Self { @@ -910,6 +907,11 @@ impl ObjectField { crate::to_snake_case(&self.name) } + /// The `SCREAMING_SNAKE_CASE` name of the object, e.g. `TRANSLATION_AND_MAT3X3`. + pub fn screaming_snake_case_name(&self) -> String { + self.snake_case_name().to_uppercase() + } + /// The `PascalCase` name of the field, e.g. `TranslationAndMat3x3`. pub fn pascal_case_name(&self) -> String { crate::to_pascal_case(&self.name) diff --git a/rerun_cpp/tests/generated/datatypes.hpp b/rerun_cpp/tests/generated/datatypes.hpp index a6e10873b315..5fae6036524e 100644 --- a/rerun_cpp/tests/generated/datatypes.hpp +++ b/rerun_cpp/tests/generated/datatypes.hpp @@ -10,6 +10,7 @@ #include "datatypes/affix_fuzzer3.hpp" #include "datatypes/affix_fuzzer4.hpp" #include "datatypes/affix_fuzzer5.hpp" +#include "datatypes/enum_test.hpp" #include "datatypes/flattened_scalar.hpp" #include "datatypes/primitive_component.hpp" #include "datatypes/string_component.hpp" diff --git a/rerun_cpp/tests/generated/datatypes/.gitattributes b/rerun_cpp/tests/generated/datatypes/.gitattributes index c687c2cb6541..93c41c082ab9 100644 --- a/rerun_cpp/tests/generated/datatypes/.gitattributes +++ b/rerun_cpp/tests/generated/datatypes/.gitattributes @@ -17,6 +17,8 @@ affix_fuzzer4.cpp linguist-generated=true affix_fuzzer4.hpp linguist-generated=true affix_fuzzer5.cpp linguist-generated=true affix_fuzzer5.hpp linguist-generated=true +enum_test.cpp linguist-generated=true +enum_test.hpp linguist-generated=true flattened_scalar.cpp linguist-generated=true flattened_scalar.hpp linguist-generated=true primitive_component.cpp linguist-generated=true diff --git a/rerun_cpp/tests/generated/datatypes/enum_test.cpp b/rerun_cpp/tests/generated/datatypes/enum_test.cpp new file mode 100644 index 000000000000..06ba03e09131 --- /dev/null +++ b/rerun_cpp/tests/generated/datatypes/enum_test.cpp @@ -0,0 +1,64 @@ +// DO NOT EDIT! This file was auto-generated by crates/re_types_builder/src/codegen/cpp/mod.rs +// Based on "crates/re_types/definitions/rerun/testing/datatypes/enum.fbs". + +#include "enum_test.hpp" + +#include +#include + +namespace rerun { + const std::shared_ptr& Loggable::arrow_datatype() { + static const auto datatype = arrow::sparse_union({ + arrow::field("_null_markers", arrow::null(), true, nullptr), + arrow::field("Up", arrow::null(), false), + arrow::field("Down", arrow::null(), false), + arrow::field("Right", arrow::null(), false), + arrow::field("Left", arrow::null(), false), + arrow::field("Forward", arrow::null(), false), + arrow::field("Back", arrow::null(), false), + }); + return datatype; + } + + rerun::Error Loggable::fill_arrow_array_builder( + arrow::SparseUnionBuilder* builder, const datatypes::EnumTest* elements, size_t num_elements + ) { + if (builder == nullptr) { + return rerun::Error(ErrorCode::UnexpectedNullArgument, "Passed array builder is null."); + } + if (elements == nullptr) { + return rerun::Error( + ErrorCode::UnexpectedNullArgument, + "Cannot serialize null pointer to arrow array." + ); + } + + ARROW_RETURN_NOT_OK(builder->Reserve(static_cast(num_elements))); + for (size_t elem_idx = 0; elem_idx < num_elements; elem_idx += 1) { + const auto variant = elements[elem_idx]; + ARROW_RETURN_NOT_OK(builder->Append(static_cast(variant))); + } + + return Error::ok(); + } + + Result> Loggable::to_arrow( + const datatypes::EnumTest* instances, size_t num_instances + ) { + // TODO(andreas): Allow configuring the memory pool. + arrow::MemoryPool* pool = arrow::default_memory_pool(); + auto datatype = arrow_datatype(); + + ARROW_ASSIGN_OR_RAISE(auto builder, arrow::MakeBuilder(datatype, pool)) + if (instances && num_instances > 0) { + RR_RETURN_NOT_OK(Loggable::fill_arrow_array_builder( + static_cast(builder.get()), + instances, + num_instances + )); + } + std::shared_ptr array; + ARROW_RETURN_NOT_OK(builder->Finish(&array)); + return array; + } +} // namespace rerun diff --git a/rerun_cpp/tests/generated/datatypes/enum_test.hpp b/rerun_cpp/tests/generated/datatypes/enum_test.hpp new file mode 100644 index 000000000000..647c87dd9da7 --- /dev/null +++ b/rerun_cpp/tests/generated/datatypes/enum_test.hpp @@ -0,0 +1,63 @@ +// DO NOT EDIT! This file was auto-generated by crates/re_types_builder/src/codegen/cpp/mod.rs +// Based on "crates/re_types/definitions/rerun/testing/datatypes/enum.fbs". + +#pragma once + +#include +#include +#include + +namespace arrow { + class Array; + class DataType; + class SparseUnionBuilder; +} // namespace arrow + +namespace rerun::datatypes { + /// **Datatype**: A test of the enum type. + enum class EnumTest : uint8_t { + + /// Great film. + Up = 1, + + /// Feeling blue. + Down = 2, + + /// Correct. + Right = 3, + + /// It's what's remaining. + Left = 4, + + /// It's the only way to go. + Forward = 5, + + /// Baby's got it. + Back = 6, + }; +} // namespace rerun::datatypes + +namespace rerun { + template + struct Loggable; + + /// \private + template <> + struct Loggable { + static constexpr const char Name[] = "rerun.testing.datatypes.EnumTest"; + + /// Returns the arrow data type this type corresponds to. + static const std::shared_ptr& arrow_datatype(); + + /// Fills an arrow array builder with an array of this type. + static rerun::Error fill_arrow_array_builder( + arrow::SparseUnionBuilder* builder, const datatypes::EnumTest* elements, + size_t num_elements + ); + + /// Serializes an array of `rerun::datatypes::EnumTest` into an arrow array. + static Result> to_arrow( + const datatypes::EnumTest* instances, size_t num_instances + ); + }; +} // namespace rerun diff --git a/rerun_py/tests/test_types/datatypes/.gitattributes b/rerun_py/tests/test_types/datatypes/.gitattributes index d9813bcc1c60..e3dc46583103 100644 --- a/rerun_py/tests/test_types/datatypes/.gitattributes +++ b/rerun_py/tests/test_types/datatypes/.gitattributes @@ -10,6 +10,7 @@ affix_fuzzer22.py linguist-generated=true affix_fuzzer3.py linguist-generated=true affix_fuzzer4.py linguist-generated=true affix_fuzzer5.py linguist-generated=true +enum_test.py linguist-generated=true flattened_scalar.py linguist-generated=true primitive_component.py linguist-generated=true string_component.py linguist-generated=true diff --git a/rerun_py/tests/test_types/datatypes/__init__.py b/rerun_py/tests/test_types/datatypes/__init__.py index 118ed3c974cb..316d26fbde78 100644 --- a/rerun_py/tests/test_types/datatypes/__init__.py +++ b/rerun_py/tests/test_types/datatypes/__init__.py @@ -28,6 +28,7 @@ AffixFuzzer22Like, AffixFuzzer22Type, ) +from .enum_test import EnumTest, EnumTestArrayLike, EnumTestBatch, EnumTestLike, EnumTestType from .flattened_scalar import ( FlattenedScalar, FlattenedScalarArrayLike, @@ -91,6 +92,11 @@ "AffixFuzzer5Batch", "AffixFuzzer5Like", "AffixFuzzer5Type", + "EnumTest", + "EnumTestArrayLike", + "EnumTestBatch", + "EnumTestLike", + "EnumTestType", "FlattenedScalar", "FlattenedScalarArrayLike", "FlattenedScalarBatch", diff --git a/rerun_py/tests/test_types/datatypes/enum_test.py b/rerun_py/tests/test_types/datatypes/enum_test.py new file mode 100644 index 000000000000..f0e2800b1218 --- /dev/null +++ b/rerun_py/tests/test_types/datatypes/enum_test.py @@ -0,0 +1,95 @@ +# DO NOT EDIT! This file was auto-generated by crates/re_types_builder/src/codegen/python/mod.rs +# Based on "crates/re_types/definitions/rerun/testing/datatypes/enum.fbs". + +# You can extend this class by creating a "EnumTestExt" class in "enum_test_ext.py". + +from __future__ import annotations + +from typing import Sequence, Union + +import pyarrow as pa +from rerun._baseclasses import BaseBatch, BaseExtensionType + +__all__ = ["EnumTest", "EnumTestArrayLike", "EnumTestBatch", "EnumTestLike", "EnumTestType"] + + +from enum import Enum + + +class EnumTest(Enum): + """**Datatype**: A test of the enum type.""" + + UP = 1 + """Great film.""" + + DOWN = 2 + """Feeling blue.""" + + RIGHT = 3 + """Correct.""" + + LEFT = 4 + """It's what's remaining.""" + + FORWARD = 5 + """It's the only way to go.""" + + BACK = 6 + """Baby's got it.""" + + +EnumTestLike = EnumTest +EnumTestArrayLike = Union[EnumTest, Sequence[EnumTestLike]] + + +class EnumTestType(BaseExtensionType): + _TYPE_NAME: str = "rerun.testing.datatypes.EnumTest" + + def __init__(self) -> None: + pa.ExtensionType.__init__( + self, + pa.sparse_union( + [ + pa.field("_null_markers", pa.null(), nullable=True, metadata={}), + pa.field("Up", pa.null(), nullable=True, metadata={}), + pa.field("Down", pa.null(), nullable=True, metadata={}), + pa.field("Right", pa.null(), nullable=True, metadata={}), + pa.field("Left", pa.null(), nullable=True, metadata={}), + pa.field("Forward", pa.null(), nullable=True, metadata={}), + pa.field("Back", pa.null(), nullable=True, metadata={}), + ] + ), + self._TYPE_NAME, + ) + + +class EnumTestBatch(BaseBatch[EnumTestArrayLike]): + _ARROW_TYPE = EnumTestType() + + @staticmethod + def _native_to_pa_array(data: EnumTestArrayLike, data_type: pa.DataType) -> pa.Array: + if isinstance(data, EnumTest): + data = [data] + + types: list[int] = [] + + for value in data: + if value is None: + types.append(0) + elif isinstance(value, EnumTest): + types.append(value.value) # Actual enum value + elif isinstance(value, int): + types.append(value) # By number + elif isinstance(value, str): + types.append(EnumTest[value].value) # By name + else: + raise ValueError(f"Unknown EnumTest kind: {value}") + + return pa.UnionArray.from_buffers( + type=data_type, + length=len(data), + buffers=[ + None, + pa.array(types, type=pa.int8()).buffers()[1], + ], + )