diff --git a/src/common/exception/src/exception_code.rs b/src/common/exception/src/exception_code.rs index 4ef3d3152d16b..57e2a660315e1 100644 --- a/src/common/exception/src/exception_code.rs +++ b/src/common/exception/src/exception_code.rs @@ -145,6 +145,7 @@ build_exceptions! { VirtualColumnNotFound(1115), VirtualColumnAlreadyExists(1116), ColumnReferencedByComputedColumn(1117), + ColumnReferencedByInvertedIndex(1118), // The table is not a clustered table. UnclusteredTable(1118), UnknownCatalog(1119), diff --git a/src/query/service/src/interpreters/access/privilege_access.rs b/src/query/service/src/interpreters/access/privilege_access.rs index 5003ae5e27e28..1b82a3a0bac1e 100644 --- a/src/query/service/src/interpreters/access/privilege_access.rs +++ b/src/query/service/src/interpreters/access/privilege_access.rs @@ -53,7 +53,7 @@ enum ObjectId { // some statements like `SELECT 1`, `SHOW USERS`, `SHOW ROLES`, `SHOW TABLES` will be // rewritten to the queries on the system tables, we need to skip the privilege check on // these tables. -const SYSTEM_TABLES_ALLOW_LIST: [&str; 18] = [ +const SYSTEM_TABLES_ALLOW_LIST: [&str; 19] = [ "catalogs", "columns", "databases", @@ -72,6 +72,7 @@ const SYSTEM_TABLES_ALLOW_LIST: [&str; 18] = [ "processes", "user_functions", "functions", + "indexes", ]; impl PrivilegeAccess { diff --git a/src/query/service/src/interpreters/interpreter_table_drop_column.rs b/src/query/service/src/interpreters/interpreter_table_drop_column.rs index eec91354c3e61..02e62ec3628c2 100644 --- a/src/query/service/src/interpreters/interpreter_table_drop_column.rs +++ b/src/query/service/src/interpreters/interpreter_table_drop_column.rs @@ -86,9 +86,10 @@ impl Interpreter for DropTableColumnInterpreter { ))); } - let mut schema: DataSchema = table_info.schema().into(); - let field = schema.field_with_name(self.plan.column.as_str())?; + let table_schema = table_info.schema(); + let field = table_schema.field_with_name(self.plan.column.as_str())?; if field.computed_expr().is_none() { + let mut schema: DataSchema = table_info.schema().into(); schema.drop_column(self.plan.column.as_str())?; // Check if this column is referenced by computed columns. check_referenced_computed_columns( @@ -97,6 +98,17 @@ impl Interpreter for DropTableColumnInterpreter { self.plan.column.as_str(), )?; } + // If the column is inverted index column, the column can't be dropped. + if !table_info.meta.indexes.is_empty() { + for (index_name, index) in &table_info.meta.indexes { + if index.column_ids.contains(&field.column_id) { + return Err(ErrorCode::ColumnReferencedByInvertedIndex(format!( + "column `{}` is referenced by inverted index, drop inverted index `{}` first", + field.name, index_name, + ))); + } + } + } let catalog = self.ctx.get_catalog(catalog_name).await?; let mut new_table_meta = table.get_table_info().meta.clone(); diff --git a/src/query/service/src/interpreters/interpreter_table_modify_column.rs b/src/query/service/src/interpreters/interpreter_table_modify_column.rs index 5c39ccc9db2c0..bdaa111ba2a00 100644 --- a/src/query/service/src/interpreters/interpreter_table_modify_column.rs +++ b/src/query/service/src/interpreters/interpreter_table_modify_column.rs @@ -183,7 +183,7 @@ impl ModifyTableColumnInterpreter { for (field, comment) in field_and_comments { let column = &field.name.to_string(); let data_type = &field.data_type; - if let Some((i, _)) = schema.column_with_name(column) { + if let Some((i, old_field)) = schema.column_with_name(column) { if data_type != &new_schema.fields[i].data_type { // Check if this column is referenced by computed columns. let mut data_schema: DataSchema = table_info.schema().into(); @@ -204,6 +204,20 @@ impl ModifyTableColumnInterpreter { data_type ))); } + // If the column is inverted index column, the type can't be changed. + if !table_info.meta.indexes.is_empty() { + for (index_name, index) in &table_info.meta.indexes { + if index.column_ids.contains(&old_field.column_id) + && old_field.data_type.remove_nullable() + != field.data_type.remove_nullable() + { + return Err(ErrorCode::ColumnReferencedByInvertedIndex(format!( + "column `{}` is referenced by inverted index, drop inverted index `{}` first", + column, index_name, + ))); + } + } + } new_schema.fields[i].data_type = data_type.clone(); } if table_info.meta.field_comments[i] != *comment { diff --git a/src/query/service/tests/it/servers/http/http_query_handlers.rs b/src/query/service/tests/it/servers/http/http_query_handlers.rs index 7ffd1ba305a47..b84ed17dcb90e 100644 --- a/src/query/service/tests/it/servers/http/http_query_handlers.rs +++ b/src/query/service/tests/it/servers/http/http_query_handlers.rs @@ -344,7 +344,7 @@ async fn test_show_databases() -> Result<()> { let _fixture = TestFixture::setup().await?; let sql = "show databases"; - let (status, result) = post_sql(sql, 1).await?; + let (status, result) = post_sql(sql, 3).await?; assert_eq!(status, StatusCode::OK, "{:?}", result); assert!(result.error.is_none(), "{:?}", result); // has only one field: name @@ -564,7 +564,7 @@ async fn test_pagination() -> Result<()> { let ep = create_endpoint().await?; let sql = "select * from numbers(10)"; - let json = serde_json::json!({"sql": sql.to_string(), "pagination": {"wait_time_secs": 1, "max_rows_per_page": 2}, "session": { "settings": {}}}); + let json = serde_json::json!({"sql": sql.to_string(), "pagination": {"wait_time_secs": 3, "max_rows_per_page": 2}, "session": { "settings": {}}}); let (status, result) = post_json_to_endpoint(&ep, &json, HeaderMap::default()).await?; assert_eq!(status, StatusCode::OK, "{:?}", result); @@ -693,7 +693,7 @@ async fn test_system_tables() -> Result<()> { let sql = "select name from system.tables where database='system' order by name"; - let (status, result) = post_sql_to_endpoint(&ep, sql, 1).await?; + let (status, result) = post_sql_to_endpoint(&ep, sql, 3).await?; assert_eq!(status, StatusCode::OK, "{:?}", result); assert!(!result.data.is_empty(), "{:?}", result); diff --git a/src/query/storages/system/src/indexes_table.rs b/src/query/storages/system/src/indexes_table.rs index 0c7ac5af77d17..61b5cea9671cd 100644 --- a/src/query/storages/system/src/indexes_table.rs +++ b/src/query/storages/system/src/indexes_table.rs @@ -30,6 +30,7 @@ use databend_common_meta_app::schema::TableIdent; use databend_common_meta_app::schema::TableInfo; use databend_common_meta_app::schema::TableMeta; use databend_common_storages_fuse::TableContext; +use log::warn; use crate::table::AsyncOneBlockSystemTable; use crate::table::AsyncSystemTable; @@ -57,12 +58,15 @@ impl AsyncSystemTable for IndexesTable { .list_indexes(ListIndexesReq::new(&tenant, None)) .await?; - let mut names = Vec::with_capacity(indexes.len()); - let mut types = Vec::with_capacity(indexes.len()); - let mut originals = Vec::with_capacity(indexes.len()); - let mut defs = Vec::with_capacity(indexes.len()); - let mut created_on = Vec::with_capacity(indexes.len()); - let mut updated_on = Vec::with_capacity(indexes.len()); + let inverted_index_tables = self.list_inverted_index_tables(ctx.clone()).await?; + + let len = indexes.len() + inverted_index_tables.len(); + let mut names = Vec::with_capacity(len); + let mut types = Vec::with_capacity(len); + let mut originals = Vec::with_capacity(len); + let mut defs = Vec::with_capacity(len); + let mut created_on = Vec::with_capacity(len); + let mut updated_on = Vec::with_capacity(len); for (_, name, index) in indexes { names.push(name.clone()); @@ -73,6 +77,37 @@ impl AsyncSystemTable for IndexesTable { updated_on.push(index.updated_on.map(|u| u.timestamp_micros())); } + for table in inverted_index_tables { + for (name, index) in &table.meta.indexes { + names.push(name.clone()); + types.push("INVERTED".to_string()); + originals.push("".to_string()); + + let schema = table.schema(); + let columns = index + .column_ids + .iter() + .map(|id| { + let field = schema.field_of_column_id(*id).unwrap(); + field.name.clone() + }) + .collect::>() + .join(", "); + + let options = index + .options + .iter() + .map(|(key, val)| format!("{}='{}'", key, val)) + .collect::>() + .join(" "); + + let def = format!("{}({}){}", table.name, columns, options); + defs.push(def); + created_on.push(table.meta.created_on.timestamp_micros()); + updated_on.push(None); + } + } + Ok(DataBlock::new_from_columns(vec![ StringType::from_data(names), StringType::from_data(types), @@ -113,4 +148,66 @@ impl IndexesTable { AsyncOneBlockSystemTable::create(Self { table_info }) } + + async fn list_inverted_index_tables( + &self, + ctx: Arc, + ) -> Result> { + let tenant = ctx.get_tenant(); + let visibility_checker = ctx.get_visibility_checker().await?; + let catalog = ctx.get_catalog(CATALOG_DEFAULT).await?; + + let ctl_name = catalog.name(); + let dbs = match catalog.list_databases(&tenant).await { + Ok(dbs) => dbs + .into_iter() + .filter(|db| { + visibility_checker.check_database_visibility( + &ctl_name, + db.name(), + db.get_db_info().ident.db_id, + ) + }) + .collect::>(), + Err(err) => { + let msg = format!("List databases failed on catalog {}: {}", ctl_name, err); + warn!("{}", msg); + ctx.push_warning(msg); + + vec![] + } + }; + + let mut index_tables = Vec::new(); + for db in dbs { + let db_id = db.get_db_info().ident.db_id; + let db_name = db.name(); + + let tables = match catalog.list_tables(&tenant, db_name).await { + Ok(tables) => tables, + Err(err) => { + let msg = format!("Failed to list tables in database: {}, {}", db_name, err); + warn!("{}", msg); + ctx.push_warning(msg); + continue; + } + }; + for table in tables { + let table_info = table.get_table_info(); + if table_info.meta.indexes.is_empty() { + continue; + } + if visibility_checker.check_table_visibility( + &ctl_name, + db_name, + table.name(), + db_id, + table.get_id(), + ) { + index_tables.push(table_info.clone()); + } + } + } + Ok(index_tables) + } } diff --git a/tests/sqllogictests/suites/ee/04_ee_inverted_index/04_0000_inverted_index_base.test b/tests/sqllogictests/suites/ee/04_ee_inverted_index/04_0000_inverted_index_base.test index 213b061f9e730..05a3b1e10a33d 100644 --- a/tests/sqllogictests/suites/ee/04_ee_inverted_index/04_0000_inverted_index_base.test +++ b/tests/sqllogictests/suites/ee/04_ee_inverted_index/04_0000_inverted_index_base.test @@ -191,7 +191,7 @@ CREATE TABLE books( ) statement ok -CREATE INVERTED INDEX IF NOT EXISTS idx1 ON books(title, author, description) tokenizer = 'chinese' filters = 'english_stop,english_stemmer,chinese_stop' +CREATE INVERTED INDEX IF NOT EXISTS idx2 ON books(title, author, description) tokenizer = 'chinese' filters = 'english_stop,english_stemmer,chinese_stop' statement ok INSERT INTO books VALUES @@ -342,10 +342,10 @@ SELECT id, score(), title FROM books WHERE query('title:(设计 实现)^5 descri # index without optional filters and index rocord is basic statement ok -CREATE OR REPLACE INVERTED INDEX idx1 ON books(title, author, description) tokenizer = 'chinese' index_record='basic' +CREATE OR REPLACE INVERTED INDEX idx2 ON books(title, author, description) tokenizer = 'chinese' index_record='basic' statement ok -REFRESH INVERTED INDEX idx1 ON books +REFRESH INVERTED INDEX idx2 ON books query IFT SELECT id, score(), title FROM books WHERE match('title^5, description^1.2', 'python') ORDER BY score() DESC @@ -413,6 +413,22 @@ SELECT id, score(), body FROM t1 WHERE query('body.metadata.tags:技术') 6 2.4057739 {"metadata":{"author":"张三","publishedDate":"2023-10-23","tags":["人工智能","机器学习","技术"]},"title":"人工智能与机器学习"} 10 2.4057739 {"metadata":{"author":"刘七","publishedDate":"2023-06-25","tags":["网络安全","隐私保护","信息技术"]},"title":"网络安全与隐私保护"} +statement error 1118 +ALTER TABLE t1 DROP COLUMN body + +statement error 1118 +ALTER TABLE books MODIFY COLUMN title int; + +statement ok +ALTER TABLE books MODIFY COLUMN title string not null + +query TTT +SELECT name, type, definition FROM system.indexes order by name +---- +idx INVERTED t1(body)tokenizer='chinese' +idx1 INVERTED t(content)index_record='"basic"' tokenizer='chinese' +idx2 INVERTED books(title, author, description)index_record='"basic"' tokenizer='chinese' + statement ok use default diff --git a/tests/suites/5_ee/06_inverted_index/06_0001_index_visibility.result b/tests/suites/5_ee/06_inverted_index/06_0001_index_visibility.result new file mode 100644 index 0000000000000..7a26d3273b7b3 --- /dev/null +++ b/tests/suites/5_ee/06_inverted_index/06_0001_index_visibility.result @@ -0,0 +1,3 @@ +=== test u1 with role1 === +idx1 INVERTED t1(title) +=== test u2 with role2 === diff --git a/tests/suites/5_ee/06_inverted_index/06_0001_index_visibility.sh b/tests/suites/5_ee/06_inverted_index/06_0001_index_visibility.sh new file mode 100755 index 0000000000000..f0010bbe4c8c2 --- /dev/null +++ b/tests/suites/5_ee/06_inverted_index/06_0001_index_visibility.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../../../shell_env.sh + +echo "drop role if exists role1" | $BENDSQL_CLIENT_CONNECT +echo "drop role if exists role2" | $BENDSQL_CLIENT_CONNECT +echo "drop user if exists u1" | $BENDSQL_CLIENT_CONNECT +echo "drop user if exists u2" | $BENDSQL_CLIENT_CONNECT +echo "drop database if exists db1" | $BENDSQL_CLIENT_CONNECT +echo "create database db1" | $BENDSQL_CLIENT_CONNECT +echo "create table db1.t1(id int, title string, inverted index idx1(title))" | $BENDSQL_CLIENT_CONNECT +echo "insert into db1.t1 values(1, 'hello world')" | $BENDSQL_CLIENT_CONNECT +echo "create role role1;" | $BENDSQL_CLIENT_CONNECT +echo "create role role2;" | $BENDSQL_CLIENT_CONNECT +echo "grant select on db1.t1 to role role1;" | $BENDSQL_CLIENT_CONNECT +echo "create user u1 identified by '123' with DEFAULT_ROLE='role1';" | $BENDSQL_CLIENT_CONNECT +echo "create user u2 identified by '123' with DEFAULT_ROLE='role2';" | $BENDSQL_CLIENT_CONNECT +echo "grant role role1 to u1;" | $BENDSQL_CLIENT_CONNECT +echo "grant role role2 to u2;" | $BENDSQL_CLIENT_CONNECT + +echo "=== test u1 with role1 ===" +export TEST_U1_CONNECT="bendsql --user=u1 --password=123 --host=${QUERY_MYSQL_HANDLER_HOST} --port ${QUERY_HTTP_HANDLER_PORT}" +echo "select name, type, definition from system.indexes order by name" | $TEST_U1_CONNECT + +echo "=== test u2 with role2 ===" +export TEST_U2_CONNECT="bendsql --user=u2 --password=123 --host=${QUERY_MYSQL_HANDLER_HOST} --port ${QUERY_HTTP_HANDLER_PORT}" +echo "select name, type, definition from system.indexes order by name" | $TEST_U2_CONNECT +