Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: serde compatibility of null_count #6558

Merged
merged 1 commit into from
Jul 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions query/src/storages/fuse/statistics/accumulator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ impl StatisticsAccumulator {
let col_stats = ColumnStatistics {
min,
max,
unset_bits: unset_bits as u64,
null_count: unset_bits as u64,
in_memory_size,
};

Expand Down Expand Up @@ -281,7 +281,7 @@ pub fn columns_statistics(data_block: &DataBlock) -> Result<StatisticsOfColumns>
let col_stats = ColumnStatistics {
min,
max,
unset_bits: unset_bits as u64,
null_count: unset_bits as u64,
in_memory_size,
};

Expand Down
6 changes: 3 additions & 3 deletions query/src/storages/fuse/statistics/reducers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ pub fn reduce_block_statistics<T: Borrow<StatisticsOfColumns>>(
.try_fold(HashMap::with_capacity(len), |mut acc, (id, stats)| {
let mut min_stats = Vec::with_capacity(stats.len());
let mut max_stats = Vec::with_capacity(stats.len());
let mut unset_bits = 0;
let mut null_count = 0;
let mut in_memory_size = 0;

for col_stats in stats {
min_stats.push(col_stats.min.clone());
max_stats.push(col_stats.max.clone());

unset_bits += col_stats.unset_bits;
null_count += col_stats.null_count;
in_memory_size += col_stats.in_memory_size;
}

Expand Down Expand Up @@ -89,7 +89,7 @@ pub fn reduce_block_statistics<T: Borrow<StatisticsOfColumns>>(
acc.insert(*id, ColumnStatistics {
min,
max,
unset_bits,
null_count,
in_memory_size,
});
Ok(acc)
Expand Down
10 changes: 7 additions & 3 deletions query/src/storages/index/range_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,15 @@ use crate::sessions::QueryContext;

pub type StatisticsOfColumns = HashMap<u32, ColumnStatistics>;

#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Eq, PartialEq)]
pub struct ColumnStatistics {
pub min: DataValue,
pub max: DataValue,
pub unset_bits: u64,
// A non-backward compatible change has been introduced by [PR#6067](https://github.com/datafuselabs/databend/pull/6067/files#diff-20030750809780d6492d2fe215a8eb80294aa6a8a5af2cf1bebe17eb740cae35)
// , please also see [issue#6556](https://github.com/datafuselabs/databend/issues/6556)
// therefore, we alias `null_count` with `unset_bits`, to make subsequent versions backward compatible again
#[serde(alias = "unset_bits")]
pub null_count: u64,
pub in_memory_size: u64,
}

Expand Down Expand Up @@ -238,7 +242,7 @@ impl StatColumn {
k
))
})?;
return Ok(Some(Series::from_data(vec![stat.unset_bits])));
return Ok(Some(Series::from_data(vec![stat.null_count])));
}

let mut single_point = true;
Expand Down
47 changes: 47 additions & 0 deletions query/tests/it/storages/fuse/misc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright 2022 Datafuse Labs.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use common_datavalues::DataValue;
use databend_query::storages::index::ColumnStatistics;
use serde_json::Value;

// A non-backward compatible change has been introduced by [PR#6067](https://github.com/datafuselabs/databend/pull/6067/files#diff-20030750809780d6492d2fe215a8eb80294aa6a8a5af2cf1bebe17eb740cae35)
// , please also see [issue#6556](https://github.com/datafuselabs/databend/issues/6556)
// therefore, we alias `null_count` with `unset_bits`, to make subsequent versions backward compatible again
#[test]
fn test_issue_6556_column_statistics_ser_de_compatability_null_count_alias(
) -> common_exception::Result<()> {
let col_stats = ColumnStatistics {
min: DataValue::Null,
max: DataValue::Null,
null_count: 0,
in_memory_size: 0,
};

let mut json_value = serde_json::to_value(&col_stats)?;

// replace "field" null_count with "unset_bits"
if let Value::Object(ref mut object) = json_value {
let unset_bits = object.remove("null_count").unwrap();
object.insert("unset_bits".to_owned(), unset_bits);
} else {
panic!("should return json object");
}

// test that we can still de-ser it
let new_json = json_value.to_string();
let new_col_stats = serde_json::from_str(&new_json)?;
assert_eq!(col_stats, new_col_stats);
Ok(())
}
1 change: 1 addition & 0 deletions query/tests/it/storages/fuse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

mod io;
mod meta;
mod misc;
mod operations;
mod pruning;
mod statistics;
Expand Down
2 changes: 1 addition & 1 deletion query/tests/it/storages/fuse/operations/read_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ fn test_to_partitions() -> Result<()> {
let col_stats_gen = |col_size| ColumnStatistics {
min: DataValue::Int64(1),
max: DataValue::Int64(2),
unset_bits: 0,
null_count: 0,
in_memory_size: col_size as u64,
};

Expand Down
6 changes: 3 additions & 3 deletions query/tests/it/storages/index/range_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,19 @@ async fn test_range_filter() -> Result<()> {
stats.insert(0u32, ColumnStatistics {
min: DataValue::Int64(1),
max: DataValue::Int64(20),
unset_bits: 1,
null_count: 1,
in_memory_size: 0,
});
stats.insert(1u32, ColumnStatistics {
min: DataValue::Int64(3),
max: DataValue::Int64(10),
unset_bits: 0,
null_count: 0,
in_memory_size: 0,
});
stats.insert(2u32, ColumnStatistics {
min: DataValue::String("abc".as_bytes().to_vec()),
max: DataValue::String("bcd".as_bytes().to_vec()),
unset_bits: 0,
null_count: 0,
in_memory_size: 0,
});

Expand Down