Skip to content

Commit

Permalink
chore(12123): add dosc to ParquetFormat to clarify exactly how the vi…
Browse files Browse the repository at this point in the history
…ew types are used
  • Loading branch information
wiedld committed Sep 9, 2024
1 parent 262171e commit f7695b8
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions datafusion/core/src/datasource/file_format/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,11 +232,23 @@ impl ParquetFormat {
}

/// Return `true` if should use view types.
pub fn should_use_view_types(&self) -> bool {
///
/// If this returns true, DataFusion will instruct the parquet reader
/// to read string / binary columns using view `StringView` or `BinaryView`
/// if the table schema specifies those types, regardless of any embedded metadata
/// that may specify an alternate Arrow type. The parquet reader is optimized
/// for reading `StringView` and `BinaryView` and such queries are significantly faster.
///
/// If this returns false, the parquet reader will read the columns according to the
/// defaults or any embedded Arrow type information. This may result in reading
/// `StringArrays` and then casting to `StringViewArray` which is less efficient.
pub fn force_view_types(&self) -> bool {
self.options.global.schema_force_view_types
}

/// If true, will use view types (StringView and BinaryView).
///
/// Refer to [`Self::force_view_types`].
pub fn with_force_view_types(mut self, use_views: bool) -> Self {
self.options.global.schema_force_view_types = use_views;
self
Expand Down Expand Up @@ -331,7 +343,7 @@ impl FileFormat for ParquetFormat {
Schema::try_merge(schemas)
}?;

let schema = if self.should_use_view_types() {
let schema = if self.force_view_types() {
transform_schema_to_view(&schema)
} else {
schema
Expand Down

0 comments on commit f7695b8

Please sign in to comment.