Skip to content

Commit

Permalink
Merge pull request #6558 from dantengsky/fix-serde-null_count
Browse files Browse the repository at this point in the history
fix: serde compatibility of null_count
  • Loading branch information
BohuTANG authored Jul 10, 2022
2 parents 0611c63 + 298b999 commit a2e9f0e
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 12 deletions.
4 changes: 2 additions & 2 deletions query/src/storages/fuse/statistics/accumulator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ impl StatisticsAccumulator {
let col_stats = ColumnStatistics {
min,
max,
unset_bits: unset_bits as u64,
null_count: unset_bits as u64,
in_memory_size,
};

Expand Down Expand Up @@ -281,7 +281,7 @@ pub fn columns_statistics(data_block: &DataBlock) -> Result<StatisticsOfColumns>
let col_stats = ColumnStatistics {
min,
max,
unset_bits: unset_bits as u64,
null_count: unset_bits as u64,
in_memory_size,
};

Expand Down
6 changes: 3 additions & 3 deletions query/src/storages/fuse/statistics/reducers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ pub fn reduce_block_statistics<T: Borrow<StatisticsOfColumns>>(
.try_fold(HashMap::with_capacity(len), |mut acc, (id, stats)| {
let mut min_stats = Vec::with_capacity(stats.len());
let mut max_stats = Vec::with_capacity(stats.len());
let mut unset_bits = 0;
let mut null_count = 0;
let mut in_memory_size = 0;

for col_stats in stats {
min_stats.push(col_stats.min.clone());
max_stats.push(col_stats.max.clone());

unset_bits += col_stats.unset_bits;
null_count += col_stats.null_count;
in_memory_size += col_stats.in_memory_size;
}

Expand Down Expand Up @@ -89,7 +89,7 @@ pub fn reduce_block_statistics<T: Borrow<StatisticsOfColumns>>(
acc.insert(*id, ColumnStatistics {
min,
max,
unset_bits,
null_count,
in_memory_size,
});
Ok(acc)
Expand Down
10 changes: 7 additions & 3 deletions query/src/storages/index/range_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,15 @@ use crate::sessions::QueryContext;

pub type StatisticsOfColumns = HashMap<u32, ColumnStatistics>;

#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Eq, PartialEq)]
pub struct ColumnStatistics {
pub min: DataValue,
pub max: DataValue,
pub unset_bits: u64,
// A non-backward compatible change has been introduced by [PR#6067](https://github.com/datafuselabs/databend/pull/6067/files#diff-20030750809780d6492d2fe215a8eb80294aa6a8a5af2cf1bebe17eb740cae35)
// , please also see [issue#6556](https://github.com/datafuselabs/databend/issues/6556)
// therefore, we alias `null_count` with `unset_bits`, to make subsequent versions backward compatible again
#[serde(alias = "unset_bits")]
pub null_count: u64,
pub in_memory_size: u64,
}

Expand Down Expand Up @@ -238,7 +242,7 @@ impl StatColumn {
k
))
})?;
return Ok(Some(Series::from_data(vec![stat.unset_bits])));
return Ok(Some(Series::from_data(vec![stat.null_count])));
}

let mut single_point = true;
Expand Down
47 changes: 47 additions & 0 deletions query/tests/it/storages/fuse/misc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright 2022 Datafuse Labs.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use common_datavalues::DataValue;
use databend_query::storages::index::ColumnStatistics;
use serde_json::Value;

// A non-backward compatible change has been introduced by [PR#6067](https://github.com/datafuselabs/databend/pull/6067/files#diff-20030750809780d6492d2fe215a8eb80294aa6a8a5af2cf1bebe17eb740cae35)
// , please also see [issue#6556](https://github.com/datafuselabs/databend/issues/6556)
// therefore, we alias `null_count` with `unset_bits`, to make subsequent versions backward compatible again
#[test]
fn test_issue_6556_column_statistics_ser_de_compatability_null_count_alias(
) -> common_exception::Result<()> {
let col_stats = ColumnStatistics {
min: DataValue::Null,
max: DataValue::Null,
null_count: 0,
in_memory_size: 0,
};

let mut json_value = serde_json::to_value(&col_stats)?;

// replace "field" null_count with "unset_bits"
if let Value::Object(ref mut object) = json_value {
let unset_bits = object.remove("null_count").unwrap();
object.insert("unset_bits".to_owned(), unset_bits);
} else {
panic!("should return json object");
}

// test that we can still de-ser it
let new_json = json_value.to_string();
let new_col_stats = serde_json::from_str(&new_json)?;
assert_eq!(col_stats, new_col_stats);
Ok(())
}
1 change: 1 addition & 0 deletions query/tests/it/storages/fuse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

mod io;
mod meta;
mod misc;
mod operations;
mod pruning;
mod statistics;
Expand Down
2 changes: 1 addition & 1 deletion query/tests/it/storages/fuse/operations/read_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ fn test_to_partitions() -> Result<()> {
let col_stats_gen = |col_size| ColumnStatistics {
min: DataValue::Int64(1),
max: DataValue::Int64(2),
unset_bits: 0,
null_count: 0,
in_memory_size: col_size as u64,
};

Expand Down
6 changes: 3 additions & 3 deletions query/tests/it/storages/index/range_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,19 @@ async fn test_range_filter() -> Result<()> {
stats.insert(0u32, ColumnStatistics {
min: DataValue::Int64(1),
max: DataValue::Int64(20),
unset_bits: 1,
null_count: 1,
in_memory_size: 0,
});
stats.insert(1u32, ColumnStatistics {
min: DataValue::Int64(3),
max: DataValue::Int64(10),
unset_bits: 0,
null_count: 0,
in_memory_size: 0,
});
stats.insert(2u32, ColumnStatistics {
min: DataValue::String("abc".as_bytes().to_vec()),
max: DataValue::String("bcd".as_bytes().to_vec()),
unset_bits: 0,
null_count: 0,
in_memory_size: 0,
});

Expand Down

1 comment on commit a2e9f0e

@vercel
Copy link

@vercel vercel bot commented on a2e9f0e Jul 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

databend – ./

databend-databend.vercel.app
databend-git-main-databend.vercel.app
databend.rs
databend.vercel.app

Please sign in to comment.