Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(query): show indexes add inverted indexes #15733

Merged
merged 13 commits into from
Jun 6, 2024
1 change: 1 addition & 0 deletions src/common/exception/src/exception_code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ build_exceptions! {
VirtualColumnNotFound(1115),
VirtualColumnAlreadyExists(1116),
ColumnReferencedByComputedColumn(1117),
ColumnReferencedByInvertedIndex(1118),
// The table is not a clustered table.
UnclusteredTable(1118),
UnknownCatalog(1119),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ enum ObjectId {
// some statements like `SELECT 1`, `SHOW USERS`, `SHOW ROLES`, `SHOW TABLES` will be
// rewritten to the queries on the system tables, we need to skip the privilege check on
// these tables.
const SYSTEM_TABLES_ALLOW_LIST: [&str; 18] = [
const SYSTEM_TABLES_ALLOW_LIST: [&str; 19] = [
"catalogs",
"columns",
"databases",
Expand All @@ -72,6 +72,7 @@ const SYSTEM_TABLES_ALLOW_LIST: [&str; 18] = [
"processes",
"user_functions",
"functions",
"indexes",
];

impl PrivilegeAccess {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,10 @@ impl Interpreter for DropTableColumnInterpreter {
)));
}

let mut schema: DataSchema = table_info.schema().into();
let field = schema.field_with_name(self.plan.column.as_str())?;
let table_schema = table_info.schema();
let field = table_schema.field_with_name(self.plan.column.as_str())?;
if field.computed_expr().is_none() {
let mut schema: DataSchema = table_info.schema().into();
schema.drop_column(self.plan.column.as_str())?;
// Check if this column is referenced by computed columns.
check_referenced_computed_columns(
Expand All @@ -97,6 +98,17 @@ impl Interpreter for DropTableColumnInterpreter {
self.plan.column.as_str(),
)?;
}
// If the column is inverted index column, the column can't be dropped.
if !table_info.meta.indexes.is_empty() {
for (index_name, index) in &table_info.meta.indexes {
if index.column_ids.contains(&field.column_id) {
return Err(ErrorCode::ColumnReferencedByInvertedIndex(format!(
"column `{}` is referenced by inverted index, drop inverted index `{}` first",
field.name, index_name,
)));
}
}
}

let catalog = self.ctx.get_catalog(catalog_name).await?;
let mut new_table_meta = table.get_table_info().meta.clone();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ impl ModifyTableColumnInterpreter {
for (field, comment) in field_and_comments {
let column = &field.name.to_string();
let data_type = &field.data_type;
if let Some((i, _)) = schema.column_with_name(column) {
if let Some((i, old_field)) = schema.column_with_name(column) {
if data_type != &new_schema.fields[i].data_type {
// Check if this column is referenced by computed columns.
let mut data_schema: DataSchema = table_info.schema().into();
Expand All @@ -204,6 +204,20 @@ impl ModifyTableColumnInterpreter {
data_type
)));
}
// If the column is inverted index column, the type can't be changed.
if !table_info.meta.indexes.is_empty() {
for (index_name, index) in &table_info.meta.indexes {
if index.column_ids.contains(&old_field.column_id)
&& old_field.data_type.remove_nullable()
!= field.data_type.remove_nullable()
{
return Err(ErrorCode::ColumnReferencedByInvertedIndex(format!(
"column `{}` is referenced by inverted index, drop inverted index `{}` first",
column, index_name,
)));
}
}
}
new_schema.fields[i].data_type = data_type.clone();
}
if table_info.meta.field_comments[i] != *comment {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ async fn test_show_databases() -> Result<()> {
let _fixture = TestFixture::setup().await?;

let sql = "show databases";
let (status, result) = post_sql(sql, 1).await?;
let (status, result) = post_sql(sql, 3).await?;
assert_eq!(status, StatusCode::OK, "{:?}", result);
assert!(result.error.is_none(), "{:?}", result);
// has only one field: name
Expand Down Expand Up @@ -564,7 +564,7 @@ async fn test_pagination() -> Result<()> {

let ep = create_endpoint().await?;
let sql = "select * from numbers(10)";
let json = serde_json::json!({"sql": sql.to_string(), "pagination": {"wait_time_secs": 1, "max_rows_per_page": 2}, "session": { "settings": {}}});
let json = serde_json::json!({"sql": sql.to_string(), "pagination": {"wait_time_secs": 3, "max_rows_per_page": 2}, "session": { "settings": {}}});

let (status, result) = post_json_to_endpoint(&ep, &json, HeaderMap::default()).await?;
assert_eq!(status, StatusCode::OK, "{:?}", result);
Expand Down Expand Up @@ -693,7 +693,7 @@ async fn test_system_tables() -> Result<()> {

let sql = "select name from system.tables where database='system' order by name";

let (status, result) = post_sql_to_endpoint(&ep, sql, 1).await?;
let (status, result) = post_sql_to_endpoint(&ep, sql, 3).await?;
assert_eq!(status, StatusCode::OK, "{:?}", result);
assert!(!result.data.is_empty(), "{:?}", result);

Expand Down
109 changes: 103 additions & 6 deletions src/query/storages/system/src/indexes_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use databend_common_meta_app::schema::TableIdent;
use databend_common_meta_app::schema::TableInfo;
use databend_common_meta_app::schema::TableMeta;
use databend_common_storages_fuse::TableContext;
use log::warn;

use crate::table::AsyncOneBlockSystemTable;
use crate::table::AsyncSystemTable;
Expand Down Expand Up @@ -57,12 +58,15 @@ impl AsyncSystemTable for IndexesTable {
.list_indexes(ListIndexesReq::new(&tenant, None))
.await?;

let mut names = Vec::with_capacity(indexes.len());
let mut types = Vec::with_capacity(indexes.len());
let mut originals = Vec::with_capacity(indexes.len());
let mut defs = Vec::with_capacity(indexes.len());
let mut created_on = Vec::with_capacity(indexes.len());
let mut updated_on = Vec::with_capacity(indexes.len());
let inverted_index_tables = self.list_inverted_index_tables(ctx.clone()).await?;

let len = indexes.len() + inverted_index_tables.len();
let mut names = Vec::with_capacity(len);
let mut types = Vec::with_capacity(len);
let mut originals = Vec::with_capacity(len);
let mut defs = Vec::with_capacity(len);
let mut created_on = Vec::with_capacity(len);
let mut updated_on = Vec::with_capacity(len);

for (_, name, index) in indexes {
names.push(name.clone());
Expand All @@ -73,6 +77,37 @@ impl AsyncSystemTable for IndexesTable {
updated_on.push(index.updated_on.map(|u| u.timestamp_micros()));
}

for table in inverted_index_tables {
for (name, index) in &table.meta.indexes {
names.push(name.clone());
types.push("INVERTED".to_string());
originals.push("".to_string());

let schema = table.schema();
let columns = index
.column_ids
.iter()
.map(|id| {
let field = schema.field_of_column_id(*id).unwrap();
field.name.clone()
})
.collect::<Vec<_>>()
.join(", ");

let options = index
.options
.iter()
.map(|(key, val)| format!("{}='{}'", key, val))
.collect::<Vec<_>>()
.join(" ");

let def = format!("{}({}){}", table.name, columns, options);
defs.push(def);
created_on.push(table.meta.created_on.timestamp_micros());
updated_on.push(None);
}
}

Ok(DataBlock::new_from_columns(vec![
StringType::from_data(names),
StringType::from_data(types),
Expand Down Expand Up @@ -113,4 +148,66 @@ impl IndexesTable {

AsyncOneBlockSystemTable::create(Self { table_info })
}

async fn list_inverted_index_tables(
&self,
ctx: Arc<dyn TableContext>,
) -> Result<Vec<TableInfo>> {
let tenant = ctx.get_tenant();
let visibility_checker = ctx.get_visibility_checker().await?;
let catalog = ctx.get_catalog(CATALOG_DEFAULT).await?;

let ctl_name = catalog.name();
let dbs = match catalog.list_databases(&tenant).await {
Ok(dbs) => dbs
.into_iter()
.filter(|db| {
visibility_checker.check_database_visibility(
&ctl_name,
db.name(),
db.get_db_info().ident.db_id,
)
})
.collect::<Vec<_>>(),
Err(err) => {
let msg = format!("List databases failed on catalog {}: {}", ctl_name, err);
warn!("{}", msg);
ctx.push_warning(msg);

vec![]
}
};

let mut index_tables = Vec::new();
for db in dbs {
let db_id = db.get_db_info().ident.db_id;
let db_name = db.name();

let tables = match catalog.list_tables(&tenant, db_name).await {
Ok(tables) => tables,
Err(err) => {
let msg = format!("Failed to list tables in database: {}, {}", db_name, err);
warn!("{}", msg);
ctx.push_warning(msg);
continue;
}
};
for table in tables {
let table_info = table.get_table_info();
if table_info.meta.indexes.is_empty() {
continue;
}
if visibility_checker.check_table_visibility(
b41sh marked this conversation as resolved.
Show resolved Hide resolved
&ctl_name,
db_name,
table.name(),
db_id,
table.get_id(),
) {
index_tables.push(table_info.clone());
}
}
}
Ok(index_tables)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ CREATE TABLE books(
)

statement ok
CREATE INVERTED INDEX IF NOT EXISTS idx1 ON books(title, author, description) tokenizer = 'chinese' filters = 'english_stop,english_stemmer,chinese_stop'
CREATE INVERTED INDEX IF NOT EXISTS idx2 ON books(title, author, description) tokenizer = 'chinese' filters = 'english_stop,english_stemmer,chinese_stop'

statement ok
INSERT INTO books VALUES
Expand Down Expand Up @@ -342,10 +342,10 @@ SELECT id, score(), title FROM books WHERE query('title:(设计 实现)^5 descri

# index without optional filters and index rocord is basic
statement ok
CREATE OR REPLACE INVERTED INDEX idx1 ON books(title, author, description) tokenizer = 'chinese' index_record='basic'
CREATE OR REPLACE INVERTED INDEX idx2 ON books(title, author, description) tokenizer = 'chinese' index_record='basic'

statement ok
REFRESH INVERTED INDEX idx1 ON books
REFRESH INVERTED INDEX idx2 ON books

query IFT
SELECT id, score(), title FROM books WHERE match('title^5, description^1.2', 'python') ORDER BY score() DESC
Expand Down Expand Up @@ -413,6 +413,22 @@ SELECT id, score(), body FROM t1 WHERE query('body.metadata.tags:技术')
6 2.4057739 {"metadata":{"author":"张三","publishedDate":"2023-10-23","tags":["人工智能","机器学习","技术"]},"title":"人工智能与机器学习"}
10 2.4057739 {"metadata":{"author":"刘七","publishedDate":"2023-06-25","tags":["网络安全","隐私保护","信息技术"]},"title":"网络安全与隐私保护"}

statement error 1118
ALTER TABLE t1 DROP COLUMN body

statement error 1118
ALTER TABLE books MODIFY COLUMN title int;

statement ok
ALTER TABLE books MODIFY COLUMN title string not null

query TTT
SELECT name, type, definition FROM system.indexes order by name
----
idx INVERTED t1(body)tokenizer='chinese'
idx1 INVERTED t(content)index_record='"basic"' tokenizer='chinese'
idx2 INVERTED books(title, author, description)index_record='"basic"' tokenizer='chinese'

statement ok
use default

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
=== test u1 with role1 ===
idx1 INVERTED t1(title)
=== test u2 with role2 ===
29 changes: 29 additions & 0 deletions tests/suites/5_ee/06_inverted_index/06_0001_index_visibility.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env bash

CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. "$CURDIR"/../../../shell_env.sh

echo "drop role if exists role1" | $BENDSQL_CLIENT_CONNECT
echo "drop role if exists role2" | $BENDSQL_CLIENT_CONNECT
echo "drop user if exists u1" | $BENDSQL_CLIENT_CONNECT
echo "drop user if exists u2" | $BENDSQL_CLIENT_CONNECT
echo "drop database if exists db1" | $BENDSQL_CLIENT_CONNECT
echo "create database db1" | $BENDSQL_CLIENT_CONNECT
echo "create table db1.t1(id int, title string, inverted index idx1(title))" | $BENDSQL_CLIENT_CONNECT
echo "insert into db1.t1 values(1, 'hello world')" | $BENDSQL_CLIENT_CONNECT
echo "create role role1;" | $BENDSQL_CLIENT_CONNECT
echo "create role role2;" | $BENDSQL_CLIENT_CONNECT
echo "grant select on db1.t1 to role role1;" | $BENDSQL_CLIENT_CONNECT
echo "create user u1 identified by '123' with DEFAULT_ROLE='role1';" | $BENDSQL_CLIENT_CONNECT
echo "create user u2 identified by '123' with DEFAULT_ROLE='role2';" | $BENDSQL_CLIENT_CONNECT
echo "grant role role1 to u1;" | $BENDSQL_CLIENT_CONNECT
echo "grant role role2 to u2;" | $BENDSQL_CLIENT_CONNECT

echo "=== test u1 with role1 ==="
export TEST_U1_CONNECT="bendsql --user=u1 --password=123 --host=${QUERY_MYSQL_HANDLER_HOST} --port ${QUERY_HTTP_HANDLER_PORT}"
echo "select name, type, definition from system.indexes order by name" | $TEST_U1_CONNECT

echo "=== test u2 with role2 ==="
export TEST_U2_CONNECT="bendsql --user=u2 --password=123 --host=${QUERY_MYSQL_HANDLER_HOST} --port ${QUERY_HTTP_HANDLER_PORT}"
echo "select name, type, definition from system.indexes order by name" | $TEST_U2_CONNECT

Loading