Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(query): add inverted index size to block meta #15752

Merged
merged 3 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/query/service/src/test_kits/block_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ impl<'a> BlockWriter<'a> {
location,
bloom_filter_index_location,
bloom_filter_index_size,
None,
Compression::Lz4Raw,
Some(Utc::now()),
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ use std::collections::HashSet;
use std::sync::Arc;

use chrono::Utc;
// use databend_common_arrow::parquet::metadata::FileMetaData;
// use databend_common_arrow::parquet::metadata::ThriftFileMetaData;
use databend_common_base::base::tokio;
use databend_common_cache::Cache;
use databend_common_expression::types::Int32Type;
Expand All @@ -38,7 +36,6 @@ use databend_common_storages_fuse::FuseStorageFormat;
use databend_query::test_kits::*;
use databend_storages_common_cache::InMemoryCacheBuilder;
use databend_storages_common_cache::InMemoryItemCacheHolder;
// use databend_storages_common_index::BloomIndexMeta;
use databend_storages_common_table_meta::meta::BlockMeta;
use databend_storages_common_table_meta::meta::ColumnMeta;
use databend_storages_common_table_meta::meta::ColumnStatistics;
Expand Down Expand Up @@ -340,6 +337,7 @@ fn build_test_segment_info(
location: block_location,
bloom_filter_index_location: Some(location_gen.block_bloom_index_location(&block_uuid)),
bloom_filter_index_size: 0,
inverted_index_size: None,
compression: Compression::Lz4,
create_on: Some(Utc::now()),
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ async fn test_recluster_mutator_block_select() -> Result<()> {
location.clone(),
None,
0,
None,
meta::Compression::Lz4Raw,
Some(Utc::now()),
));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,7 @@ impl CompactSegmentTestFixture {
location,
None,
0,
None,
Compression::Lz4Raw,
Some(Utc::now()),
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ fn test_to_partitions() -> Result<()> {
location,
bloom_filter_location,
bloom_filter_size,
None,
meta::Compression::Lz4Raw,
Some(Utc::now()),
));
Expand Down
1 change: 1 addition & 0 deletions src/query/service/tests/it/storages/fuse/statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,7 @@ fn test_reduce_block_meta() -> databend_common_exception::Result<()> {
location.clone(),
None,
bloom_filter_index_size,
None,
Compression::Lz4Raw,
Some(Utc::now()),
);
Expand Down
5 changes: 5 additions & 0 deletions src/query/storages/common/table_meta/src/meta/v2/segment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ pub struct BlockMeta {

#[serde(default)]
pub bloom_filter_index_size: u64,
pub inverted_index_size: Option<u64>,
pub compression: Compression,

// block create_on
Expand All @@ -95,6 +96,7 @@ impl BlockMeta {
location: Location,
bloom_filter_index_location: Option<Location>,
bloom_filter_index_size: u64,
inverted_index_size: Option<u64>,
compression: Compression,
create_on: Option<DateTime<Utc>>,
) -> Self {
Expand All @@ -108,6 +110,7 @@ impl BlockMeta {
location,
bloom_filter_index_location,
bloom_filter_index_size,
inverted_index_size,
compression,
create_on,
}
Expand Down Expand Up @@ -255,6 +258,7 @@ impl BlockMeta {
bloom_filter_index_location: None,
bloom_filter_index_size: 0,
compression: Compression::Lz4,
inverted_index_size: None,
create_on: None,
}
}
Expand Down Expand Up @@ -287,6 +291,7 @@ impl BlockMeta {
bloom_filter_index_location: s.bloom_filter_index_location.clone(),
bloom_filter_index_size: s.bloom_filter_index_size,
compression: s.compression,
inverted_index_size: None,
create_on: None,
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ impl From<BlockMeta> for crate::meta::BlockMeta {
location: value.location,
bloom_filter_index_location: value.bloom_filter_index_location,
bloom_filter_index_size: value.bloom_filter_index_size,
inverted_index_size: None,
compression: value.compression.into(),
create_on: None,
}
Expand Down
7 changes: 7 additions & 0 deletions src/query/storages/fuse/src/io/write/block_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,12 @@ impl BlockBuilder {
&mut buffer,
)?;
let file_size = buffer.len() as u64;
let inverted_index_size = if !inverted_index_states.is_empty() {
let size = inverted_index_states.iter().map(|v| v.size).sum();
Some(size)
} else {
None
};
let block_meta = BlockMeta {
row_count,
block_size,
Expand All @@ -324,6 +330,7 @@ impl BlockBuilder {
.map(|v| v.size)
.unwrap_or_default(),
compression: self.write_settings.table_compression.into(),
inverted_index_size,
create_on: Some(Utc::now()),
};

Expand Down
1 change: 1 addition & 0 deletions src/query/storages/fuse/src/statistics/reducers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ pub fn reduce_block_metas<T: Borrow<BlockMeta>>(
uncompressed_byte_size += b.block_size;
compressed_byte_size += b.file_size;
index_size += b.bloom_filter_index_size;
index_size += b.inverted_index_size.unwrap_or_default();
if thresholds.check_large_enough(b.row_count as usize, b.block_size as usize)
|| b.cluster_stats.as_ref().is_some_and(|v| v.level != 0)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ impl<'a> FuseBlock<'a> {
let mut row_count = Vec::with_capacity(len);
let mut bloom_filter_location = vec![];
let mut bloom_filter_size = Vec::with_capacity(len);
let mut inverted_index_size = Vec::with_capacity(len);

let segments_io = SegmentsIO::create(
self.ctx.clone(),
Expand Down Expand Up @@ -143,6 +144,7 @@ impl<'a> FuseBlock<'a> {
.map(|s| s.0.clone()),
);
bloom_filter_size.push(block.bloom_filter_index_size);
inverted_index_size.push(block.inverted_index_size);

row_num += 1;
if row_num >= limit {
Expand Down Expand Up @@ -188,6 +190,10 @@ impl<'a> FuseBlock<'a> {
DataType::Number(NumberDataType::UInt64),
Value::Column(UInt64Type::from_data(bloom_filter_size)),
),
BlockEntry::new(
DataType::Nullable(Box::new(DataType::Number(NumberDataType::UInt64))),
Value::Column(UInt64Type::from_opt_data(inverted_index_size)),
),
],
row_num,
))
Expand All @@ -209,6 +215,10 @@ impl<'a> FuseBlock<'a> {
"bloom_filter_size",
TableDataType::Number(NumberDataType::UInt64),
),
TableField::new(
"inverted_index_size",
TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::UInt64))),
),
])
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,11 @@ idx INVERTED t1(body)tokenizer='chinese'
idx1 INVERTED t(content)index_record='"basic"' tokenizer='chinese'
idx2 INVERTED books(title, author, description)index_record='"basic"' tokenizer='chinese'

query III
select row_count, bloom_filter_size, inverted_index_size from fuse_block('test_index', 't1')
----
10 465 3534

statement ok
use default

Expand Down
Loading