Skip to content

Commit

Permalink
fix(query): fix column leaf_index (#16537)
Browse files Browse the repository at this point in the history
* chore(query): fix column leaf_index

* chore(test): update sqllogictest

* chore(binder): refine code

* chore(binder): fix tuple inner_field

* chore(binder): fix tuple inner column_id

* chore(test): update test

* chore(binder): fix TUPLE inner_column_id

* chore(test): update sqllogictest

* chore(test): update sqllogictest
  • Loading branch information
Dousir9 authored Sep 28, 2024
1 parent 3d11bd9 commit 0189b01
Show file tree
Hide file tree
Showing 10 changed files with 172 additions and 33 deletions.
2 changes: 1 addition & 1 deletion src/query/sql/src/planner/expression_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ pub fn parse_computed_expr_to_string(
field.data_type().clone(),
0,
None,
None,
Some(field.column_id),
None,
None,
);
Expand Down
34 changes: 17 additions & 17 deletions src/query/sql/src/planner/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ impl Metadata {
data_type: TableDataType,
table_index: IndexType,
path_indices: Option<Vec<IndexType>>,
leaf_index: Option<IndexType>,
column_id: Option<u32>,
column_position: Option<usize>,
virtual_computed_expr: Option<String>,
) -> IndexType {
Expand All @@ -241,7 +241,7 @@ impl Metadata {
column_index,
table_index,
path_indices,
leaf_index,
column_id,
virtual_computed_expr,
});
self.columns.push(column_entry);
Expand Down Expand Up @@ -370,16 +370,14 @@ impl Metadata {
}
}

// build leaf index in DFS order for primitive columns.
let mut leaf_index = 0;
while let Some((indices, field)) = fields.pop_front() {
if indices.is_empty() {
self.add_base_table_column(
field.name().clone(),
field.data_type().clone(),
table_index,
None,
None,
Some(field.column_id),
None,
Some(field.computed_expr().unwrap().expr().clone()),
);
Expand All @@ -402,25 +400,29 @@ impl Metadata {
field.data_type().clone(),
table_index,
path_indices,
None,
Some(field.column_id),
None,
None,
);

let mut i = fields_type.len();
for (inner_field_name, inner_field_type) in
fields_name.iter().zip(fields_type.iter()).rev()
let mut inner_column_id = field.column_id;
for (index, (inner_field_name, inner_field_type)) in
fields_name.iter().zip(fields_type.iter()).enumerate()
{
i -= 1;
let mut inner_indices = indices.clone();
inner_indices.push(i);
inner_indices.push(index);
// create tuple inner field
let inner_name = format!(
"{}:{}",
field.name(),
display_tuple_field_name(inner_field_name)
);
let inner_field = TableField::new(&inner_name, inner_field_type.clone());
let inner_field = TableField::new_from_column_id(
&inner_name,
inner_field_type.clone(),
inner_column_id,
);
inner_column_id += inner_field_type.num_leaf_columns() as u32;
fields.push_front((inner_indices, inner_field));
}
} else {
Expand All @@ -429,11 +431,10 @@ impl Metadata {
field.data_type().clone(),
table_index,
path_indices,
Some(leaf_index),
Some(field.column_id),
Some(indices[0] + 1),
None,
);
leaf_index += 1;
}
}

Expand Down Expand Up @@ -573,9 +574,8 @@ pub struct BaseTableColumn {

/// Path indices for inner column of struct data type.
pub path_indices: Option<Vec<usize>>,
/// Leaf index is the primitive column index in Parquet, constructed in DFS order.
/// None if the data type of column is struct.
pub leaf_index: Option<usize>,
/// The column id in table schema.
pub column_id: Option<u32>,
/// Virtual computed expression, generated in query.
pub virtual_computed_expr: Option<String>,
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,19 +80,18 @@ impl CollectStatisticsOptimizer {
for column in columns.iter() {
if let ColumnEntry::BaseTableColumn(BaseTableColumn {
column_index,
path_indices,
leaf_index,
column_id,
virtual_computed_expr,
..
}) = column
{
if path_indices.is_none() && virtual_computed_expr.is_none() {
if let Some(col_id) = *leaf_index {
if virtual_computed_expr.is_none() {
if let Some(column_id) = *column_id {
let col_stat = column_statistics_provider
.column_statistics(col_id as ColumnId);
.column_statistics(column_id as ColumnId);
column_stats.insert(*column_index, col_stat.cloned());
let histogram =
column_statistics_provider.histogram(col_id as ColumnId);
column_statistics_provider.histogram(column_id as ColumnId);
histograms.insert(*column_index, histogram);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,3 +255,27 @@ DROP TABLE IF EXISTS t;

statement ok
DROP TABLE IF EXISTS t1;

statement ok
CREATE OR REPLACE TABLE t1 (id VARCHAR NULL, size VARCHAR NULL, create_time int NULL, path VARCHAR NULL);

statement ok
CREATE OR REPLACE TABLE t1_random like t1 Engine = Random;

statement ok
INSERT INTO t1 SELECT * FROM t1_random LIMIT 10;

statement ok
ALTER TABLE t1 DROP COLUMN size;

statement ok
ALTER TABLE t1 DROP COLUMN path;

statement ok
SELECT * FROM t1 WHERE create_time > 111113;

statement ok
DROP TABLE IF EXISTS t1;

statement ok
DROP TABLE IF EXISTS t1_random;
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,11 @@ explain select 1 from bloom_test_t where c2=3;
EvalScalar
├── output columns: [1 (#3)]
├── expressions: [1]
├── estimated rows: 1.00
├── estimated rows: 2.67
└── Filter
├── output columns: []
├── filters: [is_true(bloom_test_t.c2 (#1) = 3)]
├── estimated rows: 1.00
├── estimated rows: 2.67
└── TableScan
├── table: default.default.bloom_test_t
├── output columns: [c2 (#1)]
Expand All @@ -150,11 +150,11 @@ explain select 1 from bloom_test_t where c3=12345;
EvalScalar
├── output columns: [1 (#3)]
├── expressions: [1]
├── estimated rows: 0.00
├── estimated rows: 1.00
└── Filter
├── output columns: []
├── filters: [is_true(bloom_test_t.c3 (#2) = 12345)]
├── estimated rows: 0.00
├── estimated rows: 1.00
└── TableScan
├── table: default.default.bloom_test_t
├── output columns: [c3 (#2)]
Expand Down
64 changes: 64 additions & 0 deletions tests/sqllogictests/suites/mode/standalone/explain/explain.test
Original file line number Diff line number Diff line change
Expand Up @@ -1763,3 +1763,67 @@ INSERT OVERWRITE ALL INTO cat.db1.t1 VALUES (order_id, 'PriorityHandling') INTO

statement ok
drop table orders_placed

# Test Tuple Statistics
statement ok
CREATE OR REPLACE TABLE t(a TUPLE(INT, INT));

statement ok
INSERT INTO t VALUES((1, 2)), ((3, 4));

query T
EXPLAIN SELECT * FROM t WHERE a.1 > 0;
----
Filter
├── output columns: [t.a (#0)]
├── filters: [is_true(t.a:"1" (#2) > 0)]
├── estimated rows: 2.00
└── TableScan
├── table: default.default.t
├── output columns: [a (#0), a:"1" (#2)]
├── read rows: 2
├── read size: < 1 KiB
├── partitions total: 1
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
├── push downs: [filters: [is_true(t.a:"1" (#2) > 0)], limit: NONE]
└── estimated rows: 2.00

query T
EXPLAIN SELECT * FROM t WHERE a.1 > 1;
----
Filter
├── output columns: [t.a (#0)]
├── filters: [is_true(t.a:"1" (#2) > 1)]
├── estimated rows: 1.00
└── TableScan
├── table: default.default.t
├── output columns: [a (#0), a:"1" (#2)]
├── read rows: 2
├── read size: < 1 KiB
├── partitions total: 1
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
├── push downs: [filters: [is_true(t.a:"1" (#2) > 1)], limit: NONE]
└── estimated rows: 2.00

query T
EXPLAIN SELECT * FROM t WHERE a.2 > 1;
----
Filter
├── output columns: [t.a (#0)]
├── filters: [is_true(t.a:"2" (#1) > 1)]
├── estimated rows: 2.00
└── TableScan
├── table: default.default.t
├── output columns: [a (#0), a:"2" (#1)]
├── read rows: 2
├── read size: < 1 KiB
├── partitions total: 1
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
├── push downs: [filters: [is_true(t.a:"2" (#1) > 1)], limit: NONE]
└── estimated rows: 2.00

statement ok
DROP TABLE IF EXISTS t;
Original file line number Diff line number Diff line change
Expand Up @@ -117,16 +117,16 @@ explain select * from t_where_optimizer where s:a > 0
----
Filter
├── output columns: [t_where_optimizer.id (#0), t_where_optimizer.s (#1)]
├── filters: [t_where_optimizer.s:a (#2) > 0]
├── filters: [t_where_optimizer.s:a (#3) > 0]
├── estimated rows: 0.00
└── TableScan
├── table: default.default.t_where_optimizer
├── output columns: [id (#0), s (#1), s:a (#2)]
├── output columns: [id (#0), s (#1), s:a (#3)]
├── read rows: 0
├── read size: 0
├── partitions total: 0
├── partitions scanned: 0
├── push downs: [filters: [t_where_optimizer.s:a (#2) > 0], limit: NONE]
├── push downs: [filters: [t_where_optimizer.s:a (#3) > 0], limit: NONE]
└── estimated rows: 0.00

statement ok
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ TableScan
├── partitions scanned: 0
├── pruning stats: [segments: <range pruning: 3 to 3>, blocks: <range pruning: 3 to 1, bloom pruning: 1 to 0>]
├── push downs: [filters: [is_true(bloom_test_t.c2 (#1) = 3)], limit: NONE]
└── estimated rows: 1.00
└── estimated rows: 2.67

statement ok
drop table bloom_test_t
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -996,3 +996,55 @@ drop table t2;

statement ok
drop table t3;

# Test Tuple Statistics
statement ok
CREATE OR REPLACE TABLE t(a TUPLE(INT, INT));

statement ok
INSERT INTO t VALUES((1, 2)), ((3, 4));

query T
EXPLAIN SELECT * FROM t WHERE a.1 > 0;
----
TableScan
├── table: default.default.t
├── output columns: [a (#0)]
├── read rows: 2
├── read size: < 1 KiB
├── partitions total: 1
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
├── push downs: [filters: [is_true(t.a:"1" (#2) > 0)], limit: NONE]
└── estimated rows: 2.00

query T
EXPLAIN SELECT * FROM t WHERE a.1 > 1;
----
TableScan
├── table: default.default.t
├── output columns: [a (#0)]
├── read rows: 2
├── read size: < 1 KiB
├── partitions total: 1
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
├── push downs: [filters: [is_true(t.a:"1" (#2) > 1)], limit: NONE]
└── estimated rows: 1.00

query T
EXPLAIN SELECT * FROM t WHERE a.2 > 1;
----
TableScan
├── table: default.default.t
├── output columns: [a (#0)]
├── read rows: 2
├── read size: < 1 KiB
├── partitions total: 1
├── partitions scanned: 1
├── pruning stats: [segments: <range pruning: 1 to 1>, blocks: <range pruning: 1 to 1>]
├── push downs: [filters: [is_true(t.a:"2" (#1) > 1)], limit: NONE]
└── estimated rows: 2.00

statement ok
DROP TABLE IF EXISTS t;
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ TableScan
├── read size: 0
├── partitions total: 0
├── partitions scanned: 0
├── push downs: [filters: [t_where_optimizer.s:a (#2) > 0], limit: NONE]
├── push downs: [filters: [t_where_optimizer.s:a (#3) > 0], limit: NONE]
└── estimated rows: 0.00

statement ok
Expand Down

0 comments on commit 0189b01

Please sign in to comment.