diff --git a/crates/store/re_types/definitions/rerun/blueprint/datatypes/component_column_selector.fbs b/crates/store/re_types/definitions/rerun/blueprint/datatypes/component_column_selector.fbs index e17c8d8d95c3..977b0ca4f269 100644 --- a/crates/store/re_types/definitions/rerun/blueprint/datatypes/component_column_selector.fbs +++ b/crates/store/re_types/definitions/rerun/blueprint/datatypes/component_column_selector.fbs @@ -5,6 +5,7 @@ namespace rerun.blueprint.datatypes; //TODO(ab, jleibs): this probably needs reunification with whatever structure the data out API uses. // Has to be a table because flatbuffer doesn't support strings in structs. table ComponentColumnSelector ( + "attr.python.aliases": "str", "attr.rust.derive": "Default, PartialEq, Eq, Hash", "attr.rerun.scope": "blueprint" ) { diff --git a/crates/store/re_types/definitions/rerun/blueprint/datatypes/filter_by_event.fbs b/crates/store/re_types/definitions/rerun/blueprint/datatypes/filter_by_event.fbs index b59d2e127596..e43d3a87f65a 100644 --- a/crates/store/re_types/definitions/rerun/blueprint/datatypes/filter_by_event.fbs +++ b/crates/store/re_types/definitions/rerun/blueprint/datatypes/filter_by_event.fbs @@ -4,8 +4,9 @@ namespace rerun.blueprint.datatypes; /// Configuration for the filter by event feature of the dataframe view. // Has to be a table because flatbuffer doesn't support strings in structs. table FilterByEvent ( - "attr.rust.derive": "Default, PartialEq, Eq", - "attr.rerun.scope": "blueprint" + "attr.python.aliases": "blueprint_datatypes.ComponentColumnSelectorLike", + "attr.rust.derive": "Default, PartialEq, Eq", + "attr.rerun.scope": "blueprint" ) { /// Whether the filter by event feature is active. active: rerun.datatypes.Bool (order: 100); diff --git a/crates/store/re_types/definitions/rerun/blueprint/datatypes/selected_columns.fbs b/crates/store/re_types/definitions/rerun/blueprint/datatypes/selected_columns.fbs index da87e4dcfd65..e15ab1509d5e 100644 --- a/crates/store/re_types/definitions/rerun/blueprint/datatypes/selected_columns.fbs +++ b/crates/store/re_types/definitions/rerun/blueprint/datatypes/selected_columns.fbs @@ -4,6 +4,7 @@ namespace rerun.blueprint.datatypes; /// List of selected columns in a dataframe. // Has to be a table because flatbuffer doesn't support strings in structs. table SelectedColumns ( + "attr.python.aliases": "Sequence[Union[blueprint_datatypes.ComponentColumnSelectorLike, datatypes.Utf8Like]]", "attr.rust.derive": "Default, PartialEq, Eq", "attr.rerun.scope": "blueprint" ) { diff --git a/crates/viewer/re_space_view_dataframe/src/view_query_v2/blueprint.rs b/crates/viewer/re_space_view_dataframe/src/view_query_v2/blueprint.rs index b5d981665cb0..b5bcce76cfae 100644 --- a/crates/viewer/re_space_view_dataframe/src/view_query_v2/blueprint.rs +++ b/crates/viewer/re_space_view_dataframe/src/view_query_v2/blueprint.rs @@ -3,7 +3,7 @@ use std::collections::HashSet; use crate::dataframe_ui::HideColumnAction; use crate::view_query_v2::QueryV2; use re_chunk_store::{ColumnDescriptor, ColumnSelector}; -use re_log_types::{TimeInt, TimelineName}; +use re_log_types::{EntityPath, TimeInt, TimelineName}; use re_types::blueprint::{components, datatypes}; use re_viewer_context::{SpaceViewSystemExecutionError, ViewerContext}; @@ -168,7 +168,15 @@ impl QueryV2 { .iter() .map(|timeline_name| timeline_name.as_str().into()) .collect(); - let selected_component_columns = component_columns.iter().cloned().collect::>(); + let selected_component_columns = component_columns + .iter() + .map(|selector| { + ( + EntityPath::from(selector.entity_path.as_str()), + selector.component.as_str(), + ) + }) + .collect::>(); let query_timeline_name = *self.timeline(ctx)?.name(); let result = view_columns @@ -181,12 +189,16 @@ impl QueryV2 { || selected_time_columns.contains(desc.timeline.name()) } ColumnDescriptor::Component(desc) => { - let blueprint_component_descriptor = components::ComponentColumnSelector::new( - &desc.entity_path, - desc.component_name, - ); - - selected_component_columns.contains(&blueprint_component_descriptor) + // Check against both the full name and short name, as the user might have used + // the latter in the blueprint API. + // + // TODO(ab): this means that if the user chooses `"/foo/bar:Scalar"`, it will + // select both `rerun.components.Scalar` and `Scalar`, should both of these + // exist. + selected_component_columns + .contains(&(desc.entity_path.clone(), desc.component_name.full_name())) + || selected_component_columns + .contains(&(desc.entity_path.clone(), desc.component_name.short_name())) } }) .cloned() diff --git a/docs/snippets/all/views/dataframe.py b/docs/snippets/all/views/dataframe.py index 6423ea180a32..569835fa4581 100644 --- a/docs/snippets/all/views/dataframe.py +++ b/docs/snippets/all/views/dataframe.py @@ -8,21 +8,24 @@ rr.init("rerun_example_dataframe", spawn=True) # Log some data. -rr.log("trig/sin", rr.SeriesLine(color=[255, 0, 0], name="sin(0.01t)"), static=True) -rr.log("trig/cos", rr.SeriesLine(color=[0, 255, 0], name="cos(0.01t)"), static=True) for t in range(0, int(math.pi * 4 * 100.0)): rr.set_time_seconds("t", t) rr.log("trig/sin", rr.Scalar(math.sin(float(t) / 100.0))) rr.log("trig/cos", rr.Scalar(math.cos(float(t) / 100.0))) + # some sparse data + if t % 5 == 0: + rr.log("trig/tan_sparse", rr.Scalar(math.tan(float(t) / 100.0))) + # Create a Dataframe View blueprint = rrb.Blueprint( rrb.DataframeView( origin="/trig", - # TODO(#6896): improve `DataframeQueryV2` API and showcase more features query=rrb.archetypes.DataframeQueryV2( timeline="t", - range_filter=rrb.components.RangeFilter(start=rr.TimeInt(seconds=0), end=rr.TimeInt(seconds=20)), + filter_by_range=(rr.TimeInt(seconds=0), rr.TimeInt(seconds=20)), + filter_by_event="/trig/tan_sparse:Scalar", + select=["t", "log_tick", "/trig/sin:Scalar", "/trig/cos:Scalar", "/trig/tan_sparse:Scalar"], ), ), ) diff --git a/rerun_py/rerun_sdk/rerun/blueprint/archetypes/dataframe_query_v2.py b/rerun_py/rerun_sdk/rerun/blueprint/archetypes/dataframe_query_v2.py index 560767f33717..9c7e702f1df4 100644 --- a/rerun_py/rerun_sdk/rerun/blueprint/archetypes/dataframe_query_v2.py +++ b/rerun_py/rerun_sdk/rerun/blueprint/archetypes/dataframe_query_v2.py @@ -5,66 +5,22 @@ from __future__ import annotations -from typing import Any - from attrs import define, field -from ... import datatypes from ..._baseclasses import ( Archetype, ) -from ...blueprint import components as blueprint_components, datatypes as blueprint_datatypes -from ...error_utils import catch_and_log_exceptions +from ...blueprint import components as blueprint_components +from .dataframe_query_v2_ext import DataframeQueryV2Ext __all__ = ["DataframeQueryV2"] @define(str=False, repr=False, init=False) -class DataframeQueryV2(Archetype): +class DataframeQueryV2(DataframeQueryV2Ext, Archetype): """**Archetype**: The query for the dataframe view.""" - def __init__( - self: Any, - *, - timeline: datatypes.Utf8Like | None = None, - filter_by_range: blueprint_datatypes.FilterByRangeLike | None = None, - filter_by_event: blueprint_datatypes.FilterByEventLike | None = None, - apply_latest_at: datatypes.BoolLike | None = None, - select: blueprint_datatypes.SelectedColumnsLike | None = None, - ): - """ - Create a new instance of the DataframeQueryV2 archetype. - - Parameters - ---------- - timeline: - The timeline for this query. - - If unset, the timeline currently active on the time panel is used. - filter_by_range: - If provided, only rows whose timestamp is within this range will be shown. - - Note: will be unset as soon as `timeline` is changed. - filter_by_event: - If provided, only show rows which contains a logged event for the specified component. - apply_latest_at: - Should empty cells be filled with latest-at queries? - select: - Selected columns. If unset, all columns are selected. - - """ - - # You can define your own __init__ function as a member of DataframeQueryV2Ext in dataframe_query_v2_ext.py - with catch_and_log_exceptions(context=self.__class__.__name__): - self.__attrs_init__( - timeline=timeline, - filter_by_range=filter_by_range, - filter_by_event=filter_by_event, - apply_latest_at=apply_latest_at, - select=select, - ) - return - self.__attrs_clear__() + # __init__ can be found in dataframe_query_v2_ext.py def __attrs_clear__(self) -> None: """Convenience method for calling `__attrs_init__` with all `None`s.""" diff --git a/rerun_py/rerun_sdk/rerun/blueprint/archetypes/dataframe_query_v2_ext.py b/rerun_py/rerun_sdk/rerun/blueprint/archetypes/dataframe_query_v2_ext.py new file mode 100644 index 000000000000..61aa75593b20 --- /dev/null +++ b/rerun_py/rerun_sdk/rerun/blueprint/archetypes/dataframe_query_v2_ext.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +from typing import Any + +from ... import datatypes +from ...blueprint import components as blueprint_components, datatypes as blueprint_datatypes +from ...error_utils import catch_and_log_exceptions + + +class DataframeQueryV2Ext: + """Extension for [DataframeQueryV2][rerun.blueprint.archetypes.DataframeQueryV2].""" + + def __init__( + self: Any, + *, + timeline: datatypes.Utf8Like | None = None, + filter_by_range: tuple[datatypes.TimeInt, datatypes.TimeInt] + | blueprint_datatypes.FilterByRangeLike + | None = None, + filter_by_event: blueprint_datatypes.ComponentColumnSelectorLike | None = None, + apply_latest_at: bool = False, + select: list[blueprint_datatypes.ComponentColumnSelectorLike | datatypes.Utf8Like | str] | None = None, + ): + """ + Create a new instance of the DataframeQueryV2 archetype. + + Parameters + ---------- + timeline: + The timeline for this query. + + filter_by_range: + If set, a range filter is applied. + + filter_by_event: + If provided, the dataframe will only contain rows corresponding to timestamps at which an event was logged + for the provided column. + + apply_latest_at: + Should empty cells be filled with latest-at queries? + + select: + Selected columns. If unset, all columns are selected. + + """ + + if isinstance(filter_by_range, tuple): + start, end = filter_by_range + filter_by_range = blueprint_components.FilterByRange(start, end) + + if filter_by_event is not None: + if isinstance(filter_by_event, str): + column = blueprint_datatypes.ComponentColumnSelector(spec=filter_by_event) + else: + column = filter_by_event + + new_filter_by_event = blueprint_components.FilterByEvent(active=True, column=column) + else: + new_filter_by_event = None + + with catch_and_log_exceptions(context=self.__class__.__name__): + self.__attrs_init__( + timeline=timeline, + filter_by_range=filter_by_range, + filter_by_event=new_filter_by_event, + apply_latest_at=apply_latest_at, + select=select, + ) + return + self.__attrs_clear__() diff --git a/rerun_py/rerun_sdk/rerun/blueprint/datatypes/component_column_selector.py b/rerun_py/rerun_sdk/rerun/blueprint/datatypes/component_column_selector.py index df6047fdc826..fdab0ed1c99c 100644 --- a/rerun_py/rerun_sdk/rerun/blueprint/datatypes/component_column_selector.py +++ b/rerun_py/rerun_sdk/rerun/blueprint/datatypes/component_column_selector.py @@ -5,7 +5,7 @@ from __future__ import annotations -from typing import Any, Sequence, Union +from typing import TYPE_CHECKING, Any, Sequence, Union import pyarrow as pa from attrs import define, field @@ -15,6 +15,7 @@ BaseBatch, BaseExtensionType, ) +from .component_column_selector_ext import ComponentColumnSelectorExt __all__ = [ "ComponentColumnSelector", @@ -42,24 +43,10 @@ def _component_column_selector__component__special_field_converter_override(x: d @define(init=False) -class ComponentColumnSelector: +class ComponentColumnSelector(ComponentColumnSelectorExt): """**Datatype**: Describe a component column to be selected in the dataframe view.""" - def __init__(self: Any, entity_path: datatypes.EntityPathLike, component: datatypes.Utf8Like): - """ - Create a new instance of the ComponentColumnSelector datatype. - - Parameters - ---------- - entity_path: - The entity path for this component. - component: - The name of the component. - - """ - - # You can define your own __init__ function as a member of ComponentColumnSelectorExt in component_column_selector_ext.py - self.__attrs_init__(entity_path=entity_path, component=component) + # __init__ can be found in component_column_selector_ext.py entity_path: datatypes.EntityPath = field( converter=_component_column_selector__entity_path__special_field_converter_override @@ -74,7 +61,11 @@ def __init__(self: Any, entity_path: datatypes.EntityPathLike, component: dataty # (Docstring intentionally commented out to hide this field from the docs) -ComponentColumnSelectorLike = ComponentColumnSelector +if TYPE_CHECKING: + ComponentColumnSelectorLike = Union[ComponentColumnSelector, str] +else: + ComponentColumnSelectorLike = Any + ComponentColumnSelectorArrayLike = Union[ ComponentColumnSelector, Sequence[ComponentColumnSelectorLike], @@ -100,15 +91,4 @@ class ComponentColumnSelectorBatch(BaseBatch[ComponentColumnSelectorArrayLike]): @staticmethod def _native_to_pa_array(data: ComponentColumnSelectorArrayLike, data_type: pa.DataType) -> pa.Array: - from rerun.datatypes import EntityPathBatch, Utf8Batch - - if isinstance(data, ComponentColumnSelector): - data = [data] - - return pa.StructArray.from_arrays( - [ - EntityPathBatch([x.entity_path for x in data]).as_arrow_array().storage, # type: ignore[misc, arg-type] - Utf8Batch([x.component for x in data]).as_arrow_array().storage, # type: ignore[misc, arg-type] - ], - fields=list(data_type), - ) + return ComponentColumnSelectorExt.native_to_pa_array_override(data, data_type) diff --git a/rerun_py/rerun_sdk/rerun/blueprint/datatypes/component_column_selector_ext.py b/rerun_py/rerun_sdk/rerun/blueprint/datatypes/component_column_selector_ext.py new file mode 100644 index 000000000000..7333bc0b87bc --- /dev/null +++ b/rerun_py/rerun_sdk/rerun/blueprint/datatypes/component_column_selector_ext.py @@ -0,0 +1,87 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Sequence + +import pyarrow as pa + +from ... import datatypes + +if TYPE_CHECKING: + from .component_column_selector import ComponentColumnSelectorArrayLike + + +class ComponentColumnSelectorExt: + """Extension for [ComponentColumnSelector][rerun.blueprint.datatypes.ComponentColumnSelector].""" + + def __init__( + self: Any, + spec: str | None = None, + *, + entity_path: datatypes.EntityPathLike | None = None, + component: datatypes.Utf8Like | None = None, + ): + """ + Create a new instance of the ComponentColumnSelector datatype. + + Parameters + ---------- + spec: + A string in the format "/entity/path:ComponentName". If used, `entity_path` and `component` must be `None`. + + entity_path: + The column's entity path. If used, `spec` must be `None` and `component` must be provided. + + component: + The column's component name. If used, `spec` must be `None` and `entity_path` must be provided. + + """ + + if spec is not None: + if entity_path is not None or component is not None: + raise ValueError("Either `spec` or both `entity_path` and `component` must be provided.") + if not isinstance(spec, str): + raise ValueError(f"Unexpected input value (`spec` must be a string): {spec}") + entity_path, component = _parse_spec(spec) + else: + if entity_path is None or component is None: + raise ValueError("Both `entity_path` and `component` must be provided.") + + self.__attrs_init__(entity_path=entity_path, component=component) + + # Override needed to address the `str` case. + @staticmethod + def native_to_pa_array_override(input_data: ComponentColumnSelectorArrayLike, data_type: pa.DataType) -> pa.Array: + from ...components import EntityPathBatch + from ...datatypes import Utf8Batch + from .component_column_selector import ComponentColumnSelector + + if isinstance(input_data, ComponentColumnSelector): + data: Sequence[ComponentColumnSelector] = [input_data] + else: + data = [ + item if isinstance(item, ComponentColumnSelector) else ComponentColumnSelector(item) + for item in input_data + ] + + return pa.StructArray.from_arrays( + [ + EntityPathBatch([x.entity_path for x in data]).as_arrow_array().storage, # type: ignore[misc, arg-type] + Utf8Batch([x.component for x in data]).as_arrow_array().storage, # type: ignore[misc, arg-type] + ], + fields=list(data_type), + ) + + +def _parse_spec(spec: str) -> tuple[datatypes.EntityPath, datatypes.Utf8]: + """ + Parse the component column specifier. + + Raises `ValueError` if the specifier is invalid. + """ + + try: + entity_path, component = spec.split(":") + except ValueError as e: + raise ValueError(f"Invalid component column specifier: {spec}") from e + + return datatypes.EntityPath(entity_path), datatypes.Utf8(component) diff --git a/rerun_py/rerun_sdk/rerun/blueprint/datatypes/filter_by_event.py b/rerun_py/rerun_sdk/rerun/blueprint/datatypes/filter_by_event.py index a25367a4aa59..80196c26e116 100644 --- a/rerun_py/rerun_sdk/rerun/blueprint/datatypes/filter_by_event.py +++ b/rerun_py/rerun_sdk/rerun/blueprint/datatypes/filter_by_event.py @@ -5,7 +5,7 @@ from __future__ import annotations -from typing import Any, Sequence, Union +from typing import TYPE_CHECKING, Any, Sequence, Union import pyarrow as pa from attrs import define, field @@ -16,6 +16,7 @@ BaseExtensionType, ) from ...blueprint import datatypes as blueprint_datatypes +from .filter_by_event_ext import FilterByEventExt __all__ = ["FilterByEvent", "FilterByEventArrayLike", "FilterByEventBatch", "FilterByEventLike", "FilterByEventType"] @@ -28,7 +29,7 @@ def _filter_by_event__active__special_field_converter_override(x: datatypes.Bool @define(init=False) -class FilterByEvent: +class FilterByEvent(FilterByEventExt): """**Datatype**: Configuration for the filter by event feature of the dataframe view.""" def __init__(self: Any, active: datatypes.BoolLike, column: blueprint_datatypes.ComponentColumnSelectorLike): @@ -58,7 +59,11 @@ def __init__(self: Any, active: datatypes.BoolLike, column: blueprint_datatypes. # (Docstring intentionally commented out to hide this field from the docs) -FilterByEventLike = FilterByEvent +if TYPE_CHECKING: + FilterByEventLike = Union[FilterByEvent, blueprint_datatypes.ComponentColumnSelectorLike] +else: + FilterByEventLike = Any + FilterByEventArrayLike = Union[ FilterByEvent, Sequence[FilterByEventLike], @@ -92,16 +97,4 @@ class FilterByEventBatch(BaseBatch[FilterByEventArrayLike]): @staticmethod def _native_to_pa_array(data: FilterByEventArrayLike, data_type: pa.DataType) -> pa.Array: - from rerun.blueprint.datatypes import ComponentColumnSelectorBatch - from rerun.datatypes import BoolBatch - - if isinstance(data, FilterByEvent): - data = [data] - - return pa.StructArray.from_arrays( - [ - BoolBatch([x.active for x in data]).as_arrow_array().storage, # type: ignore[misc, arg-type] - ComponentColumnSelectorBatch([x.column for x in data]).as_arrow_array().storage, # type: ignore[misc, arg-type] - ], - fields=list(data_type), - ) + return FilterByEventExt.native_to_pa_array_override(data, data_type) diff --git a/rerun_py/rerun_sdk/rerun/blueprint/datatypes/filter_by_event_ext.py b/rerun_py/rerun_sdk/rerun/blueprint/datatypes/filter_by_event_ext.py new file mode 100644 index 000000000000..90107c4535eb --- /dev/null +++ b/rerun_py/rerun_sdk/rerun/blueprint/datatypes/filter_by_event_ext.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pyarrow as pa + +from ...blueprint import components as blueprint_components + +if TYPE_CHECKING: + from .filter_by_event import FilterByEvent, FilterByEventArrayLike, FilterByEventLike + + +class _NotAFilterByEventLike(BaseException): + pass + + +class FilterByEventExt: + """Extension for [FilterByEvent][rerun.blueprint.datatypes.FilterByEvent].""" + + @staticmethod + def native_to_pa_array_override(input_data: FilterByEventArrayLike, data_type: pa.DataType) -> pa.Array: + from ...blueprint.datatypes import ComponentColumnSelectorBatch + from ...datatypes import BoolBatch + + try: + data = [_to_filter_by_event(input_data)] # type: ignore[arg-type] + except _NotAFilterByEventLike: + try: + data = [_to_filter_by_event(d) for d in input_data] # type: ignore[union-attr] + except _NotAFilterByEventLike: + raise ValueError(f"Unexpected input value: {input_data}") + + return pa.StructArray.from_arrays( + [ + BoolBatch([x.active for x in data]).as_arrow_array().storage, # type: ignore[misc, arg-type] + ComponentColumnSelectorBatch( + [x.column for x in data], + ) + .as_arrow_array() + .storage, # type: ignore[misc, arg-type] + ], + fields=list(data_type), + ) + + +def _to_filter_by_event(input_data: FilterByEventLike) -> FilterByEvent: + from .filter_by_event import FilterByEvent + + if isinstance(input_data, FilterByEvent): + return input_data + elif isinstance(input_data, str): + return FilterByEvent(active=True, column=blueprint_components.ComponentColumnSelector(spec=input_data)) + elif isinstance(input_data, blueprint_components.ComponentColumnSelector): + return FilterByEvent(active=True, column=input_data) + else: + raise _NotAFilterByEventLike() diff --git a/rerun_py/rerun_sdk/rerun/blueprint/datatypes/selected_columns.py b/rerun_py/rerun_sdk/rerun/blueprint/datatypes/selected_columns.py index 3ddca3bf1e11..6e87d470a38a 100644 --- a/rerun_py/rerun_sdk/rerun/blueprint/datatypes/selected_columns.py +++ b/rerun_py/rerun_sdk/rerun/blueprint/datatypes/selected_columns.py @@ -5,7 +5,7 @@ from __future__ import annotations -from typing import Any, Sequence, Union +from typing import TYPE_CHECKING, Any, Sequence, Union import pyarrow as pa from attrs import define, field @@ -16,6 +16,7 @@ BaseExtensionType, ) from ...blueprint import datatypes as blueprint_datatypes +from .selected_columns_ext import SelectedColumnsExt __all__ = [ "SelectedColumns", @@ -27,28 +28,10 @@ @define(init=False) -class SelectedColumns: +class SelectedColumns(SelectedColumnsExt): """**Datatype**: List of selected columns in a dataframe.""" - def __init__( - self: Any, - time_columns: datatypes.Utf8ArrayLike, - component_columns: blueprint_datatypes.ComponentColumnSelectorArrayLike, - ): - """ - Create a new instance of the SelectedColumns datatype. - - Parameters - ---------- - time_columns: - The time columns to include - component_columns: - The component columns to include - - """ - - # You can define your own __init__ function as a member of SelectedColumnsExt in selected_columns_ext.py - self.__attrs_init__(time_columns=time_columns, component_columns=component_columns) + # __init__ can be found in selected_columns_ext.py time_columns: list[datatypes.Utf8] = field() # The time columns to include @@ -61,7 +44,13 @@ def __init__( # (Docstring intentionally commented out to hide this field from the docs) -SelectedColumnsLike = SelectedColumns +if TYPE_CHECKING: + SelectedColumnsLike = Union[ + SelectedColumns, Sequence[Union[blueprint_datatypes.ComponentColumnSelectorLike, datatypes.Utf8Like]] + ] +else: + SelectedColumnsLike = Any + SelectedColumnsArrayLike = Union[ SelectedColumns, Sequence[SelectedColumnsLike], @@ -107,6 +96,4 @@ class SelectedColumnsBatch(BaseBatch[SelectedColumnsArrayLike]): @staticmethod def _native_to_pa_array(data: SelectedColumnsArrayLike, data_type: pa.DataType) -> pa.Array: - raise NotImplementedError( - "Arrow serialization of SelectedColumns not implemented: We lack codegen for arrow-serialization of general structs" - ) # You need to implement native_to_pa_array_override in selected_columns_ext.py + return SelectedColumnsExt.native_to_pa_array_override(data, data_type) diff --git a/rerun_py/rerun_sdk/rerun/blueprint/datatypes/selected_columns_ext.py b/rerun_py/rerun_sdk/rerun/blueprint/datatypes/selected_columns_ext.py new file mode 100644 index 000000000000..88ec1fe57e7f --- /dev/null +++ b/rerun_py/rerun_sdk/rerun/blueprint/datatypes/selected_columns_ext.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +import itertools +from typing import TYPE_CHECKING, Any, Iterable, Sequence + +import pyarrow as pa + +from ... import datatypes +from ...blueprint import components as blueprint_components, datatypes as blueprint_datatypes + +if TYPE_CHECKING: + from .selected_columns import SelectedColumnsArrayLike + + +class SelectedColumnsExt: + """Extension for [SelectedColumns][rerun.blueprint.datatypes.SelectedColumns].""" + + def __init__( + self: Any, columns: Sequence[blueprint_datatypes.ComponentColumnSelectorLike | datatypes.Utf8Like] + ) -> None: + """ + Create a new instance of the `SelectedColumns` datatype. + + Example: + ```python + SelectedColumns(["timeline", "/entity/path:ComponentName"]) + ``` + + Parameters + ---------- + columns: + The columns to include. + + The column must be either of the timeline, or component kind. Timeline columns can be specified using a + `str` without any `:`, or an `Utf8`. Component columns can be specified using either a `str` in the form of + `"/entity/path:ComponentName"`, or a `ComponentColumnSelector`. + + """ + + time_columns: list[datatypes.Utf8] = [] + component_columns: list[blueprint_datatypes.ComponentColumnSelector] = [] + + for column in columns: + if isinstance(column, blueprint_components.ComponentColumnSelector): + component_columns.append(column) + elif isinstance(column, datatypes.Utf8): + time_columns.append(column) + elif isinstance(column, str): + try: + comp_col = blueprint_components.ComponentColumnSelector(spec=column) + component_columns.append(comp_col) + except ValueError: + time_columns.append(datatypes.Utf8(column)) + else: + raise ValueError(f"Unexpected column type: {column}") + + self.__attrs_init__(time_columns=time_columns, component_columns=component_columns) + + @staticmethod + def native_to_pa_array_override(input_data: SelectedColumnsArrayLike, data_type: pa.DataType) -> pa.Array: + from ...blueprint.components import ComponentColumnSelectorBatch + from ...datatypes import Utf8Batch + from .selected_columns import SelectedColumns + + if isinstance(input_data, SelectedColumns): + data = [input_data] + else: + # if we're a sequence, chances are we the input of a single SelectedColumns… + try: + data = [SelectedColumns(input_data)] # type: ignore[arg-type] + except ValueError: + # …but it could be that we're a sequence of SelectedColumns/inputs to SelectedColumns + data = [d if isinstance(d, SelectedColumns) else SelectedColumns(d) for d in input_data] + + time_columns = pa.ListArray.from_arrays( + offsets=_compute_offsets(d.time_columns for d in data), + values=Utf8Batch( + list(itertools.chain.from_iterable(d.time_columns for d in data)), + ) + .as_arrow_array() + .storage, + type=data_type.field(0).type, + ) + + component_columns = pa.ListArray.from_arrays( + offsets=_compute_offsets(d.component_columns for d in data), + values=ComponentColumnSelectorBatch( + list(itertools.chain.from_iterable(d.component_columns for d in data)), + ) + .as_arrow_array() + .storage, # type: ignore[misc, arg-type] + type=data_type.field(1).type, + ) + + return pa.StructArray.from_arrays( + [ + time_columns, + component_columns, + ], + fields=list(data_type), + ) + + +def _compute_offsets(data: Iterable[Sequence[Any]]) -> list[int]: + o = 0 + return [o] + [o := o + len(d) for d in data] diff --git a/rerun_py/rerun_sdk/rerun/blueprint/views/dataframe_view.py b/rerun_py/rerun_sdk/rerun/blueprint/views/dataframe_view.py index 2887156f9e06..1ae8d946cd8f 100644 --- a/rerun_py/rerun_sdk/rerun/blueprint/views/dataframe_view.py +++ b/rerun_py/rerun_sdk/rerun/blueprint/views/dataframe_view.py @@ -33,21 +33,24 @@ class DataframeView(SpaceView): rr.init("rerun_example_dataframe", spawn=True) # Log some data. - rr.log("trig/sin", rr.SeriesLine(color=[255, 0, 0], name="sin(0.01t)"), static=True) - rr.log("trig/cos", rr.SeriesLine(color=[0, 255, 0], name="cos(0.01t)"), static=True) for t in range(0, int(math.pi * 4 * 100.0)): rr.set_time_seconds("t", t) rr.log("trig/sin", rr.Scalar(math.sin(float(t) / 100.0))) rr.log("trig/cos", rr.Scalar(math.cos(float(t) / 100.0))) + # some sparse data + if t % 5 == 0: + rr.log("trig/tan_sparse", rr.Scalar(math.tan(float(t) / 100.0))) + # Create a Dataframe View blueprint = rrb.Blueprint( rrb.DataframeView( origin="/trig", - # TODO(#6896): improve `DataframeQueryV2` API and showcase more features query=rrb.archetypes.DataframeQueryV2( timeline="t", - range_filter=rrb.components.RangeFilter(start=rr.TimeInt(seconds=0), end=rr.TimeInt(seconds=20)), + filter_by_range=(rr.TimeInt(seconds=0), rr.TimeInt(seconds=20)), + filter_by_event="/trig/tan_sparse:Scalar", + select=["t", "log_tick", "/trig/sin:Scalar", "/trig/cos:Scalar", "/trig/tan_sparse:Scalar"], ), ), ) diff --git a/rerun_py/tests/unit/test_dataframe_view_query.py b/rerun_py/tests/unit/test_dataframe_view_query.py new file mode 100644 index 000000000000..cd4dcbfd071f --- /dev/null +++ b/rerun_py/tests/unit/test_dataframe_view_query.py @@ -0,0 +1,187 @@ +from __future__ import annotations + +import pytest +import rerun as rr +import rerun.blueprint.components as blueprint_components +from rerun import TimeInt, datatypes +from rerun.blueprint.archetypes import DataframeQueryV2 + + +def test_component_column_selector_explicit() -> None: + selector = blueprint_components.ComponentColumnSelector(entity_path="entity/path", component="ComponentName") + + assert selector.entity_path == rr.datatypes.EntityPath("entity/path") + assert selector.component == rr.datatypes.Utf8("ComponentName") + + +def test_component_column_selector_spec() -> None: + selector = blueprint_components.ComponentColumnSelector("entity/path:ComponentName") + + assert selector.entity_path == rr.datatypes.EntityPath("entity/path") + assert selector.component == rr.datatypes.Utf8("ComponentName") + + +def test_component_column_selector_fail() -> None: + with pytest.raises(ValueError): + blueprint_components.ComponentColumnSelector(entity_path="entity/path", component=None) + + with pytest.raises(ValueError): + blueprint_components.ComponentColumnSelector(spec="entity/path:ComponentName", entity_path="entity/path") + + with pytest.raises(ValueError): + blueprint_components.ComponentColumnSelector(spec="entity/path:ComponentName", component="ComponentName") + + with pytest.raises(ValueError): + blueprint_components.ComponentColumnSelector() + + with pytest.raises(ValueError): + blueprint_components.ComponentColumnSelector(spec="hello") + + with pytest.raises(ValueError): + blueprint_components.ComponentColumnSelector(spec="hello:world:extra") + + +def test_component_column_selector_batch() -> None: + a = blueprint_components.ComponentColumnSelectorBatch(["/entity/path:ComponentName"]) + b = blueprint_components.ComponentColumnSelectorBatch( + blueprint_components.ComponentColumnSelector("/entity/path:ComponentName") + ) + c = blueprint_components.ComponentColumnSelectorBatch([ + blueprint_components.ComponentColumnSelector("/entity/path:ComponentName") + ]) + + assert a == b + assert b == c + + +def test_selected_columns() -> None: + columns = blueprint_components.SelectedColumns([ + "t", + "/entity/path:ComponentName", + datatypes.Utf8("frame"), + blueprint_components.ComponentColumnSelector("/world/robot:Position3D"), + ]) + + assert columns.time_columns == [ + datatypes.Utf8("t"), + datatypes.Utf8("frame"), + ] + assert columns.component_columns == [ + blueprint_components.ComponentColumnSelector("/entity/path:ComponentName"), + blueprint_components.ComponentColumnSelector("/world/robot:Position3D"), + ] + + +def test_selected_columns_batch() -> None: + a = blueprint_components.SelectedColumnsBatch([ + [ + "t", + "/entity/path:ComponentName", + datatypes.Utf8("frame"), + blueprint_components.ComponentColumnSelector("/world/robot:Position3D"), + ] + ]) + b = blueprint_components.SelectedColumnsBatch( + blueprint_components.SelectedColumns([ + "t", + "/entity/path:ComponentName", + datatypes.Utf8("frame"), + blueprint_components.ComponentColumnSelector("/world/robot:Position3D"), + ]) + ) + + assert a == b + + +def test_selected_columns_batch_multiple() -> None: + a = blueprint_components.SelectedColumnsBatch([ + [ + "t", + ], + [ + "/entity/path:ComponentName", + ], + [ + "frame", + "/world/robot:Position3D", + ], + ]) + b = blueprint_components.SelectedColumnsBatch([ + blueprint_components.SelectedColumns([ + "t", + ]), + blueprint_components.SelectedColumns([ + "/entity/path:ComponentName", + ]), + blueprint_components.SelectedColumns([ + datatypes.Utf8("frame"), + blueprint_components.ComponentColumnSelector("/world/robot:Position3D"), + ]), + ]) + + assert a == b + + +def test_dataframe_query_property() -> None: + query = DataframeQueryV2( + timeline="frame", + filter_by_range=(TimeInt(seq=1), TimeInt(seq=10)), + filter_by_event="/entity/path:ComponentName", + apply_latest_at=True, + select=[ + "t", + "/entity/path:ComponentName", + ], + ) + + assert query.timeline == blueprint_components.TimelineNameBatch("frame") + assert query.filter_by_range == blueprint_components.FilterByRangeBatch( + blueprint_components.FilterByRange(rr.datatypes.TimeInt(seq=1), rr.datatypes.TimeInt(seq=10)) + ) + assert query.filter_by_event == blueprint_components.FilterByEventBatch( + blueprint_components.FilterByEvent( + active=True, + column=blueprint_components.ComponentColumnSelector(entity_path="/entity/path", component="ComponentName"), + ) + ) + + assert query.apply_latest_at == blueprint_components.ApplyLatestAtBatch(blueprint_components.ApplyLatestAt(True)) + + assert query.select == blueprint_components.SelectedColumnsBatch( + blueprint_components.SelectedColumns([ + datatypes.Utf8("t"), + blueprint_components.ComponentColumnSelector(entity_path="/entity/path", component="ComponentName"), + ]) + ) + + +def test_dataframe_query_property_explicit() -> None: + query = DataframeQueryV2( + timeline=blueprint_components.TimelineName("frame"), + filter_by_range=blueprint_components.FilterByRange(start=TimeInt(seq=1), end=TimeInt(seq=10)), + filter_by_event=blueprint_components.ComponentColumnSelector( + entity_path="/entity/path", component="ComponentName" + ), + select=[ + datatypes.Utf8("frame"), + blueprint_components.ComponentColumnSelector("/world/robot:Position3D"), + ], + ) + + assert query.timeline == blueprint_components.TimelineNameBatch("frame") + assert query.filter_by_range == blueprint_components.FilterByRangeBatch( + blueprint_components.FilterByRange(rr.datatypes.TimeInt(seq=1), rr.datatypes.TimeInt(seq=10)) + ) + assert query.filter_by_event == blueprint_components.FilterByEventBatch( + blueprint_components.FilterByEvent( + active=True, + column=blueprint_components.ComponentColumnSelector(entity_path="/entity/path", component="ComponentName"), + ) + ) + + assert query.select == blueprint_components.SelectedColumnsBatch( + blueprint_components.SelectedColumns([ + datatypes.Utf8("frame"), + blueprint_components.ComponentColumnSelector(entity_path="/world/robot", component="Position3D"), + ]) + )