Skip to content

Commit

Permalink
feat(query): add inverted index size to block meta (#15752)
Browse files Browse the repository at this point in the history
* feat(query): add inverted index size to block meta

* fix
  • Loading branch information
b41sh authored Jun 7, 2024
1 parent af82370 commit febac7b
Show file tree
Hide file tree
Showing 12 changed files with 35 additions and 3 deletions.
1 change: 1 addition & 0 deletions src/query/service/src/test_kits/block_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ impl<'a> BlockWriter<'a> {
location,
bloom_filter_index_location,
bloom_filter_index_size,
None,
Compression::Lz4Raw,
Some(Utc::now()),
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ use std::collections::HashSet;
use std::sync::Arc;

use chrono::Utc;
// use databend_common_arrow::parquet::metadata::FileMetaData;
// use databend_common_arrow::parquet::metadata::ThriftFileMetaData;
use databend_common_base::base::tokio;
use databend_common_cache::Cache;
use databend_common_expression::types::Int32Type;
Expand All @@ -38,7 +36,6 @@ use databend_common_storages_fuse::FuseStorageFormat;
use databend_query::test_kits::*;
use databend_storages_common_cache::InMemoryCacheBuilder;
use databend_storages_common_cache::InMemoryItemCacheHolder;
// use databend_storages_common_index::BloomIndexMeta;
use databend_storages_common_table_meta::meta::BlockMeta;
use databend_storages_common_table_meta::meta::ColumnMeta;
use databend_storages_common_table_meta::meta::ColumnStatistics;
Expand Down Expand Up @@ -340,6 +337,7 @@ fn build_test_segment_info(
location: block_location,
bloom_filter_index_location: Some(location_gen.block_bloom_index_location(&block_uuid)),
bloom_filter_index_size: 0,
inverted_index_size: None,
compression: Compression::Lz4,
create_on: Some(Utc::now()),
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ async fn test_recluster_mutator_block_select() -> Result<()> {
location.clone(),
None,
0,
None,
meta::Compression::Lz4Raw,
Some(Utc::now()),
));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,7 @@ impl CompactSegmentTestFixture {
location,
None,
0,
None,
Compression::Lz4Raw,
Some(Utc::now()),
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ fn test_to_partitions() -> Result<()> {
location,
bloom_filter_location,
bloom_filter_size,
None,
meta::Compression::Lz4Raw,
Some(Utc::now()),
));
Expand Down
1 change: 1 addition & 0 deletions src/query/service/tests/it/storages/fuse/statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,7 @@ fn test_reduce_block_meta() -> databend_common_exception::Result<()> {
location.clone(),
None,
bloom_filter_index_size,
None,
Compression::Lz4Raw,
Some(Utc::now()),
);
Expand Down
5 changes: 5 additions & 0 deletions src/query/storages/common/table_meta/src/meta/v2/segment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ pub struct BlockMeta {

#[serde(default)]
pub bloom_filter_index_size: u64,
pub inverted_index_size: Option<u64>,
pub compression: Compression,

// block create_on
Expand All @@ -95,6 +96,7 @@ impl BlockMeta {
location: Location,
bloom_filter_index_location: Option<Location>,
bloom_filter_index_size: u64,
inverted_index_size: Option<u64>,
compression: Compression,
create_on: Option<DateTime<Utc>>,
) -> Self {
Expand All @@ -108,6 +110,7 @@ impl BlockMeta {
location,
bloom_filter_index_location,
bloom_filter_index_size,
inverted_index_size,
compression,
create_on,
}
Expand Down Expand Up @@ -255,6 +258,7 @@ impl BlockMeta {
bloom_filter_index_location: None,
bloom_filter_index_size: 0,
compression: Compression::Lz4,
inverted_index_size: None,
create_on: None,
}
}
Expand Down Expand Up @@ -287,6 +291,7 @@ impl BlockMeta {
bloom_filter_index_location: s.bloom_filter_index_location.clone(),
bloom_filter_index_size: s.bloom_filter_index_size,
compression: s.compression,
inverted_index_size: None,
create_on: None,
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ impl From<BlockMeta> for crate::meta::BlockMeta {
location: value.location,
bloom_filter_index_location: value.bloom_filter_index_location,
bloom_filter_index_size: value.bloom_filter_index_size,
inverted_index_size: None,
compression: value.compression.into(),
create_on: None,
}
Expand Down
7 changes: 7 additions & 0 deletions src/query/storages/fuse/src/io/write/block_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,12 @@ impl BlockBuilder {
&mut buffer,
)?;
let file_size = buffer.len() as u64;
let inverted_index_size = if !inverted_index_states.is_empty() {
let size = inverted_index_states.iter().map(|v| v.size).sum();
Some(size)
} else {
None
};
let block_meta = BlockMeta {
row_count,
block_size,
Expand All @@ -395,6 +401,7 @@ impl BlockBuilder {
.map(|v| v.size)
.unwrap_or_default(),
compression: self.write_settings.table_compression.into(),
inverted_index_size,
create_on: Some(Utc::now()),
};

Expand Down
1 change: 1 addition & 0 deletions src/query/storages/fuse/src/statistics/reducers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ pub fn reduce_block_metas<T: Borrow<BlockMeta>>(
uncompressed_byte_size += b.block_size;
compressed_byte_size += b.file_size;
index_size += b.bloom_filter_index_size;
index_size += b.inverted_index_size.unwrap_or_default();
if thresholds.check_large_enough(b.row_count as usize, b.block_size as usize)
|| b.cluster_stats.as_ref().is_some_and(|v| v.level != 0)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ impl<'a> FuseBlock<'a> {
let mut row_count = Vec::with_capacity(len);
let mut bloom_filter_location = vec![];
let mut bloom_filter_size = Vec::with_capacity(len);
let mut inverted_index_size = Vec::with_capacity(len);

let segments_io = SegmentsIO::create(
self.ctx.clone(),
Expand Down Expand Up @@ -143,6 +144,7 @@ impl<'a> FuseBlock<'a> {
.map(|s| s.0.clone()),
);
bloom_filter_size.push(block.bloom_filter_index_size);
inverted_index_size.push(block.inverted_index_size);

row_num += 1;
if row_num >= limit {
Expand Down Expand Up @@ -188,6 +190,10 @@ impl<'a> FuseBlock<'a> {
DataType::Number(NumberDataType::UInt64),
Value::Column(UInt64Type::from_data(bloom_filter_size)),
),
BlockEntry::new(
DataType::Nullable(Box::new(DataType::Number(NumberDataType::UInt64))),
Value::Column(UInt64Type::from_opt_data(inverted_index_size)),
),
],
row_num,
))
Expand All @@ -209,6 +215,10 @@ impl<'a> FuseBlock<'a> {
"bloom_filter_size",
TableDataType::Number(NumberDataType::UInt64),
),
TableField::new(
"inverted_index_size",
TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::UInt64))),
),
])
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,11 @@ idx INVERTED t1(body)tokenizer='chinese'
idx1 INVERTED t(content)index_record='"basic"' tokenizer='chinese'
idx2 INVERTED books(title, author, description)index_record='"basic"' tokenizer='chinese'

query III
select row_count, bloom_filter_size, inverted_index_size from fuse_block('test_index', 't1')
----
10 465 3534

statement ok
use default

Expand Down

0 comments on commit febac7b

Please sign in to comment.