From 84aa5b7b22ffc9fd4ae6527df46710631df0d93e Mon Sep 17 00:00:00 2001 From: Yohan Wal Date: Tue, 12 Nov 2024 11:04:04 +0800 Subject: [PATCH] feat: alter fulltext options (#4952) * feat(WIP): alter fulltext index Co-Authored-By: irenjj * feat: alter column fulltext option Co-Authored-By: irenjj * chore: fmt * test: add unit and integration tests Co-Authored-By: irenjj * test: update sqlness test * chore: new line * chore: lock file update * chore: apply review comments * test: update sqlness test * test: update sqlness test * fix: convert * chore: apply review comments * fix: toml fmt * fix: tests * test: add test for mito * chore: error message * fix: test * fix: test * fix: wrong comment * chore: change proto rev * chore: apply review comments * chore: apply review comments * chore: fmt --------- Co-authored-by: irenjj --- Cargo.lock | 5 +- Cargo.toml | 2 +- src/api/src/v1/column_def.rs | 13 +- src/common/grpc-expr/src/alter.rs | 23 ++- src/common/grpc-expr/src/error.rs | 11 +- .../src/ddl/alter_table/region_request.rs | 3 + .../src/ddl/alter_table/update_metadata.rs | 3 +- src/datatypes/Cargo.toml | 2 + src/datatypes/src/error.rs | 10 +- src/datatypes/src/schema.rs | 6 +- src/datatypes/src/schema/column_schema.rs | 84 +++++++- src/mito2/src/engine/alter_test.rs | 168 ++++++++++++++-- src/operator/src/expr_factory.rs | 21 +- src/query/src/sql/show_create_table.rs | 6 +- src/sql/src/error.rs | 10 +- src/sql/src/lib.rs | 4 +- src/sql/src/parsers/alter_parser.rs | 107 ++++++++++- src/sql/src/parsers/create_parser.rs | 14 +- src/sql/src/parsers/utils.rs | 9 + src/sql/src/statements.rs | 5 +- src/sql/src/statements/alter.rs | 34 ++++ src/sql/src/statements/create.rs | 39 +--- src/store-api/Cargo.toml | 1 + src/store-api/src/metadata.rs | 97 +++++++++- src/store-api/src/region_request.rs | 94 +++++++-- src/table/src/error.rs | 21 +- src/table/src/metadata.rs | 136 ++++++++++++- src/table/src/requests.rs | 6 +- .../alter/change_col_fulltext_options.result | 179 ++++++++++++++++++ .../alter/change_col_fulltext_options.sql | 52 +++++ 30 files changed, 1043 insertions(+), 122 deletions(-) create mode 100644 tests/cases/standalone/common/alter/change_col_fulltext_options.result create mode 100644 tests/cases/standalone/common/alter/change_col_fulltext_options.sql diff --git a/Cargo.lock b/Cargo.lock index b6426b59b39c..7e320880d206 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3328,6 +3328,8 @@ dependencies = [ "serde", "serde_json", "snafu 0.8.5", + "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", + "sqlparser_derive 0.1.1", ] [[package]] @@ -4581,7 +4583,7 @@ dependencies = [ [[package]] name = "greptime-proto" version = "0.1.0" -source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=255f87a3318ace3f88a67f76995a0e14910983f4#255f87a3318ace3f88a67f76995a0e14910983f4" +source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=67bb1d52bc1241972c368657e658592b1be7ead3#67bb1d52bc1241972c368657e658592b1be7ead3" dependencies = [ "prost 0.12.6", "serde", @@ -11660,6 +11662,7 @@ dependencies = [ "derive_builder 0.12.0", "futures", "humantime", + "prost 0.12.6", "serde", "serde_json", "snafu 0.8.5", diff --git a/Cargo.toml b/Cargo.toml index a23e77d2576a..6afb04634de6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -121,7 +121,7 @@ etcd-client = { version = "0.13" } fst = "0.4.7" futures = "0.3" futures-util = "0.3" -greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "255f87a3318ace3f88a67f76995a0e14910983f4" } +greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "67bb1d52bc1241972c368657e658592b1be7ead3" } humantime = "2.1" humantime-serde = "1.1" itertools = "0.10" diff --git a/src/api/src/v1/column_def.rs b/src/api/src/v1/column_def.rs index 3b150e850234..f026d3f6f97f 100644 --- a/src/api/src/v1/column_def.rs +++ b/src/api/src/v1/column_def.rs @@ -15,9 +15,10 @@ use std::collections::HashMap; use datatypes::schema::{ - ColumnDefaultConstraint, ColumnSchema, FulltextOptions, COMMENT_KEY, FULLTEXT_KEY, - INVERTED_INDEX_KEY, + ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, COMMENT_KEY, + FULLTEXT_KEY, INVERTED_INDEX_KEY, }; +use greptime_proto::v1::Analyzer; use snafu::ResultExt; use crate::error::{self, Result}; @@ -104,6 +105,14 @@ pub fn options_from_fulltext(fulltext: &FulltextOptions) -> Result FulltextAnalyzer { + match analyzer { + Analyzer::English => FulltextAnalyzer::English, + Analyzer::Chinese => FulltextAnalyzer::Chinese, + } +} + #[cfg(test)] mod tests { diff --git a/src/common/grpc-expr/src/alter.rs b/src/common/grpc-expr/src/alter.rs index 5dcc861e9d8a..ea95a62e9933 100644 --- a/src/common/grpc-expr/src/alter.rs +++ b/src/common/grpc-expr/src/alter.rs @@ -15,20 +15,22 @@ use api::helper::ColumnDataTypeWrapper; use api::v1::add_column_location::LocationType; use api::v1::alter_expr::Kind; +use api::v1::column_def::as_fulltext_option; use api::v1::{ - column_def, AddColumnLocation as Location, AlterExpr, ChangeColumnTypes, CreateTableExpr, - DropColumns, RenameTable, SemanticType, + column_def, AddColumnLocation as Location, AlterExpr, Analyzer, ChangeColumnTypes, + CreateTableExpr, DropColumns, RenameTable, SemanticType, }; use common_query::AddColumnLocation; -use datatypes::schema::{ColumnSchema, RawSchema}; +use datatypes::schema::{ColumnSchema, FulltextOptions, RawSchema}; use snafu::{ensure, OptionExt, ResultExt}; use store_api::region_request::ChangeOption; use table::metadata::TableId; use table::requests::{AddColumnRequest, AlterKind, AlterTableRequest, ChangeColumnTypeRequest}; use crate::error::{ - InvalidChangeTableOptionRequestSnafu, InvalidColumnDefSnafu, MissingFieldSnafu, - MissingTimestampColumnSnafu, Result, UnknownLocationTypeSnafu, + InvalidChangeFulltextOptionRequestSnafu, InvalidChangeTableOptionRequestSnafu, + InvalidColumnDefSnafu, MissingFieldSnafu, MissingTimestampColumnSnafu, Result, + UnknownLocationTypeSnafu, }; const LOCATION_TYPE_FIRST: i32 = LocationType::First as i32; @@ -102,6 +104,17 @@ pub fn alter_expr_to_request(table_id: TableId, expr: AlterExpr) -> Result, _>>() .context(InvalidChangeTableOptionRequestSnafu)?, }, + Kind::ChangeColumnFulltext(c) => AlterKind::ChangeColumnFulltext { + column_name: c.column_name, + options: FulltextOptions { + enable: c.enable, + analyzer: as_fulltext_option( + Analyzer::try_from(c.analyzer) + .context(InvalidChangeFulltextOptionRequestSnafu)?, + ), + case_sensitive: c.case_sensitive, + }, + }, }; let request = AlterTableRequest { diff --git a/src/common/grpc-expr/src/error.rs b/src/common/grpc-expr/src/error.rs index 5d59c469831a..c9fb1f949e6a 100644 --- a/src/common/grpc-expr/src/error.rs +++ b/src/common/grpc-expr/src/error.rs @@ -125,6 +125,14 @@ pub enum Error { #[snafu(source)] error: MetadataError, }, + + #[snafu(display("Invalid change fulltext option request"))] + InvalidChangeFulltextOptionRequest { + #[snafu(implicit)] + location: Location, + #[snafu(source)] + error: prost::DecodeError, + }, } pub type Result = std::result::Result; @@ -148,7 +156,8 @@ impl ErrorExt for Error { Error::UnknownColumnDataType { .. } | Error::InvalidFulltextColumnType { .. } => { StatusCode::InvalidArguments } - Error::InvalidChangeTableOptionRequest { .. } => StatusCode::InvalidArguments, + Error::InvalidChangeTableOptionRequest { .. } + | Error::InvalidChangeFulltextOptionRequest { .. } => StatusCode::InvalidArguments, } } diff --git a/src/common/meta/src/ddl/alter_table/region_request.rs b/src/common/meta/src/ddl/alter_table/region_request.rs index 41bd1481c9bc..06c93c786f7b 100644 --- a/src/common/meta/src/ddl/alter_table/region_request.rs +++ b/src/common/meta/src/ddl/alter_table/region_request.rs @@ -107,6 +107,9 @@ fn create_proto_alter_kind( } Kind::RenameTable(_) => Ok(None), Kind::ChangeTableOptions(v) => Ok(Some(alter_request::Kind::ChangeTableOptions(v.clone()))), + Kind::ChangeColumnFulltext(v) => { + Ok(Some(alter_request::Kind::ChangeColumnFulltext(v.clone()))) + } } } diff --git a/src/common/meta/src/ddl/alter_table/update_metadata.rs b/src/common/meta/src/ddl/alter_table/update_metadata.rs index 6d27dabe2718..3b5c5a0215d5 100644 --- a/src/common/meta/src/ddl/alter_table/update_metadata.rs +++ b/src/common/meta/src/ddl/alter_table/update_metadata.rs @@ -53,7 +53,8 @@ impl AlterTableProcedure { } AlterKind::DropColumns { .. } | AlterKind::ChangeColumnTypes { .. } - | AlterKind::ChangeTableOptions { .. } => {} + | AlterKind::ChangeTableOptions { .. } + | AlterKind::ChangeColumnFulltext { .. } => {} } Ok(new_info) diff --git a/src/datatypes/Cargo.toml b/src/datatypes/Cargo.toml index 23eac53a030c..63f23816f260 100644 --- a/src/datatypes/Cargo.toml +++ b/src/datatypes/Cargo.toml @@ -33,3 +33,5 @@ paste = "1.0" serde.workspace = true serde_json.workspace = true snafu.workspace = true +sqlparser.workspace = true +sqlparser_derive = "0.1" diff --git a/src/datatypes/src/error.rs b/src/datatypes/src/error.rs index aca9b883a952..44e3def66573 100644 --- a/src/datatypes/src/error.rs +++ b/src/datatypes/src/error.rs @@ -212,6 +212,13 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Invalid fulltext option: {}", msg))] + InvalidFulltextOption { + msg: String, + #[snafu(implicit)] + location: Location, + }, } impl ErrorExt for Error { @@ -230,7 +237,8 @@ impl ErrorExt for Error { | DuplicateMeta { .. } | InvalidTimestampPrecision { .. } | InvalidPrecisionOrScale { .. } - | InvalidJson { .. } => StatusCode::InvalidArguments, + | InvalidJson { .. } + | InvalidFulltextOption { .. } => StatusCode::InvalidArguments, ValueExceedsPrecision { .. } | CastType { .. } diff --git a/src/datatypes/src/schema.rs b/src/datatypes/src/schema.rs index 24f361f0904a..9aecfdf424c5 100644 --- a/src/datatypes/src/schema.rs +++ b/src/datatypes/src/schema.rs @@ -27,8 +27,10 @@ use snafu::{ensure, ResultExt}; use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result}; use crate::prelude::DataType; pub use crate::schema::column_schema::{ - ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, COMMENT_KEY, FULLTEXT_KEY, - INVERTED_INDEX_KEY, TIME_INDEX_KEY, + ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, + COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE, COLUMN_FULLTEXT_OPT_KEY_ANALYZER, + COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, + TIME_INDEX_KEY, }; pub use crate::schema::constraint::ColumnDefaultConstraint; pub use crate::schema::raw::RawSchema; diff --git a/src/datatypes/src/schema/column_schema.rs b/src/datatypes/src/schema/column_schema.rs index 94200ae8335b..17a6d086c500 100644 --- a/src/datatypes/src/schema/column_schema.rs +++ b/src/datatypes/src/schema/column_schema.rs @@ -18,9 +18,10 @@ use std::fmt; use arrow::datatypes::Field; use serde::{Deserialize, Serialize}; use snafu::{ensure, ResultExt}; +use sqlparser_derive::{Visit, VisitMut}; use crate::data_type::{ConcreteDataType, DataType}; -use crate::error::{self, Error, Result}; +use crate::error::{self, Error, InvalidFulltextOptionSnafu, Result}; use crate::schema::constraint::ColumnDefaultConstraint; use crate::schema::TYPE_KEY; use crate::types::JSON_TYPE_NAME; @@ -38,6 +39,12 @@ const DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint"; pub const FULLTEXT_KEY: &str = "greptime:fulltext"; /// Key used to store whether the column has inverted index in arrow field's metadata. pub const INVERTED_INDEX_KEY: &str = "greptime:inverted_index"; + +/// Keys used in fulltext options +pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable"; +pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer"; +pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive"; + /// Schema of a column, used as an immutable struct. #[derive(Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct ColumnSchema { @@ -283,6 +290,14 @@ impl ColumnSchema { ); Ok(self) } + + pub fn set_fulltext_options(&mut self, options: &FulltextOptions) -> Result<()> { + self.metadata.insert( + FULLTEXT_KEY.to_string(), + serde_json::to_string(options).context(error::SerializeSnafu)?, + ); + Ok(()) + } } impl TryFrom<&Field> for ColumnSchema { @@ -347,7 +362,7 @@ impl TryFrom<&ColumnSchema> for Field { } /// Fulltext options for a column. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)] #[serde(rename_all = "kebab-case")] pub struct FulltextOptions { /// Whether the fulltext index is enabled. @@ -360,8 +375,71 @@ pub struct FulltextOptions { pub case_sensitive: bool, } +impl fmt::Display for FulltextOptions { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "enable={}", self.enable)?; + if self.enable { + write!(f, ", analyzer={}", self.analyzer)?; + write!(f, ", case_sensitive={}", self.case_sensitive)?; + } + Ok(()) + } +} + +impl TryFrom> for FulltextOptions { + type Error = Error; + + fn try_from(options: HashMap) -> Result { + let mut fulltext_options = FulltextOptions { + enable: true, + ..Default::default() + }; + + if let Some(enable) = options.get(COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE) { + match enable.to_ascii_lowercase().as_str() { + "true" => fulltext_options.enable = true, + "false" => fulltext_options.enable = false, + _ => { + return InvalidFulltextOptionSnafu { + msg: format!("{enable}, expected: 'true' | 'false'"), + } + .fail(); + } + } + }; + + if let Some(analyzer) = options.get(COLUMN_FULLTEXT_OPT_KEY_ANALYZER) { + match analyzer.to_ascii_lowercase().as_str() { + "english" => fulltext_options.analyzer = FulltextAnalyzer::English, + "chinese" => fulltext_options.analyzer = FulltextAnalyzer::Chinese, + _ => { + return InvalidFulltextOptionSnafu { + msg: format!("{analyzer}, expected: 'English' | 'Chinese'"), + } + .fail(); + } + } + }; + + if let Some(case_sensitive) = options.get(COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE) { + match case_sensitive.to_ascii_lowercase().as_str() { + "true" => fulltext_options.case_sensitive = true, + "false" => fulltext_options.case_sensitive = false, + _ => { + return InvalidFulltextOptionSnafu { + msg: format!("{case_sensitive}, expected: 'true' | 'false'"), + } + .fail(); + } + } + } + + Ok(fulltext_options) + } +} + /// Fulltext analyzer. -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)] pub enum FulltextAnalyzer { #[default] English, diff --git a/src/mito2/src/engine/alter_test.rs b/src/mito2/src/engine/alter_test.rs index 8019ef647a86..961eaf05bbdf 100644 --- a/src/mito2/src/engine/alter_test.rs +++ b/src/mito2/src/engine/alter_test.rs @@ -22,7 +22,7 @@ use common_error::ext::ErrorExt; use common_error::status_code::StatusCode; use common_recordbatch::RecordBatches; use datatypes::prelude::ConcreteDataType; -use datatypes::schema::ColumnSchema; +use datatypes::schema::{ColumnSchema, FulltextAnalyzer, FulltextOptions}; use store_api::metadata::ColumnMetadata; use store_api::region_engine::{RegionEngine, RegionRole}; use store_api::region_request::{ @@ -68,6 +68,36 @@ fn add_tag1() -> RegionAlterRequest { } } +fn alter_column_fulltext_options() -> RegionAlterRequest { + RegionAlterRequest { + schema_version: 0, + kind: AlterKind::ChangeColumnFulltext { + column_name: "tag_0".to_string(), + options: FulltextOptions { + enable: true, + analyzer: FulltextAnalyzer::English, + case_sensitive: false, + }, + }, + } +} + +fn check_region_version( + engine: &MitoEngine, + region_id: RegionId, + last_entry_id: u64, + committed_sequence: u64, + flushed_entry_id: u64, + flushed_sequence: u64, +) { + let region = engine.get_region(region_id).unwrap(); + let version_data = region.version_control.current(); + assert_eq!(last_entry_id, version_data.last_entry_id); + assert_eq!(committed_sequence, version_data.committed_sequence); + assert_eq!(flushed_entry_id, version_data.version.flushed_entry_id); + assert_eq!(flushed_sequence, version_data.version.flushed_sequence); +} + #[tokio::test] async fn test_alter_region() { common_telemetry::init_default_ut_logging(); @@ -116,15 +146,7 @@ async fn test_alter_region() { | | 2 | 2.0 | 1970-01-01T00:00:02 | +-------+-------+---------+---------------------+"; scan_check_after_alter(&engine, region_id, expected).await; - let check_region = |engine: &MitoEngine| { - let region = engine.get_region(region_id).unwrap(); - let version_data = region.version_control.current(); - assert_eq!(1, version_data.last_entry_id); - assert_eq!(3, version_data.committed_sequence); - assert_eq!(1, version_data.version.flushed_entry_id); - assert_eq!(3, version_data.version.flushed_sequence); - }; - check_region(&engine); + check_region_version(&engine, region_id, 1, 3, 1, 3); // Reopen region. let engine = env.reopen_engine(engine, MitoConfig::default()).await; @@ -141,7 +163,7 @@ async fn test_alter_region() { .await .unwrap(); scan_check_after_alter(&engine, region_id, expected).await; - check_region(&engine); + check_region_version(&engine, region_id, 1, 3, 1, 3); } /// Build rows with schema (string, f64, ts_millis, string). @@ -328,12 +350,7 @@ async fn test_alter_region_retry() { | | a | 1.0 | 1970-01-01T00:00:01 | +-------+-------+---------+---------------------+"; scan_check_after_alter(&engine, region_id, expected).await; - let region = engine.get_region(region_id).unwrap(); - let version_data = region.version_control.current(); - assert_eq!(1, version_data.last_entry_id); - assert_eq!(2, version_data.committed_sequence); - assert_eq!(1, version_data.version.flushed_entry_id); - assert_eq!(2, version_data.version.flushed_sequence); + check_region_version(&engine, region_id, 1, 2, 1, 2); } #[tokio::test] @@ -438,3 +455,120 @@ async fn test_alter_on_flushing() { +-------+-------+---------+---------------------+"; assert_eq!(expected, batches.pretty_print().unwrap()); } + +#[tokio::test] +async fn test_alter_column_fulltext_options() { + common_telemetry::init_default_ut_logging(); + + let mut env = TestEnv::new(); + let listener = Arc::new(AlterFlushListener::default()); + let engine = env + .create_engine_with(MitoConfig::default(), None, Some(listener.clone())) + .await; + + let region_id = RegionId::new(1, 1); + let request = CreateRequestBuilder::new().build(); + + env.get_schema_metadata_manager() + .register_region_table_info( + region_id.table_id(), + "test_table", + "test_catalog", + "test_schema", + None, + ) + .await; + + let column_schemas = rows_schema(&request); + let region_dir = request.region_dir.clone(); + engine + .handle_request(region_id, RegionRequest::Create(request)) + .await + .unwrap(); + + let rows = Rows { + schema: column_schemas, + rows: build_rows(0, 3), + }; + put_rows(&engine, region_id, rows).await; + + // Spawns a task to flush the engine. + let engine_cloned = engine.clone(); + let flush_job = tokio::spawn(async move { + flush_region(&engine_cloned, region_id, None).await; + }); + // Waits for flush begin. + listener.wait_flush_begin().await; + + // Consumes the notify permit in the listener. + listener.wait_request_begin().await; + + // Submits an alter request to the region. The region should add the request + // to the pending ddl request list. + let request = alter_column_fulltext_options(); + let engine_cloned = engine.clone(); + let alter_job = tokio::spawn(async move { + engine_cloned + .handle_request(region_id, RegionRequest::Alter(request)) + .await + .unwrap(); + }); + // Waits until the worker handles the alter request. + listener.wait_request_begin().await; + + // Spawns two task to flush the engine. The flush scheduler should put them to the + // pending task list. + let engine_cloned = engine.clone(); + let pending_flush_job = tokio::spawn(async move { + flush_region(&engine_cloned, region_id, None).await; + }); + // Waits until the worker handles the flush request. + listener.wait_request_begin().await; + + // Wake up flush. + listener.wake_flush(); + // Wait for the flush job. + flush_job.await.unwrap(); + // Wait for pending flush job. + pending_flush_job.await.unwrap(); + // Wait for the write job. + alter_job.await.unwrap(); + + let expect_fulltext_options = FulltextOptions { + enable: true, + analyzer: FulltextAnalyzer::English, + case_sensitive: false, + }; + let check_fulltext_options = |engine: &MitoEngine, expected: &FulltextOptions| { + let current_fulltext_options = engine + .get_region(region_id) + .unwrap() + .metadata() + .column_by_name("tag_0") + .unwrap() + .column_schema + .fulltext_options() + .unwrap() + .unwrap(); + assert_eq!(*expected, current_fulltext_options); + }; + check_fulltext_options(&engine, &expect_fulltext_options); + check_region_version(&engine, region_id, 1, 3, 1, 3); + + // Reopen region. + let engine = env.reopen_engine(engine, MitoConfig::default()).await; + engine + .handle_request( + region_id, + RegionRequest::Open(RegionOpenRequest { + engine: String::new(), + region_dir, + options: HashMap::default(), + skip_wal_replay: false, + }), + ) + .await + .unwrap(); + check_fulltext_options(&engine, &expect_fulltext_options); + check_region_version(&engine, region_id, 1, 3, 1, 3); +} diff --git a/src/operator/src/expr_factory.rs b/src/operator/src/expr_factory.rs index 19f9648719bb..d4316a142871 100644 --- a/src/operator/src/expr_factory.rs +++ b/src/operator/src/expr_factory.rs @@ -18,15 +18,16 @@ use api::helper::ColumnDataTypeWrapper; use api::v1::alter_expr::Kind; use api::v1::column_def::options_from_column_schema; use api::v1::{ - AddColumn, AddColumns, AlterExpr, ChangeColumnType, ChangeColumnTypes, ChangeTableOptions, - ColumnDataType, ColumnDataTypeExtension, CreateFlowExpr, CreateTableExpr, CreateViewExpr, - DropColumn, DropColumns, ExpireAfter, RenameTable, SemanticType, TableName, + AddColumn, AddColumns, AlterExpr, Analyzer, ChangeColumnFulltext, ChangeColumnType, + ChangeColumnTypes, ChangeTableOptions, ColumnDataType, ColumnDataTypeExtension, CreateFlowExpr, + CreateTableExpr, CreateViewExpr, DropColumn, DropColumns, ExpireAfter, RenameTable, + SemanticType, TableName, }; use common_error::ext::BoxedError; use common_grpc_expr::util::ColumnExpr; use common_time::Timezone; use datafusion::sql::planner::object_name_to_table_reference; -use datatypes::schema::{ColumnSchema, COMMENT_KEY}; +use datatypes::schema::{ColumnSchema, FulltextAnalyzer, COMMENT_KEY}; use file_engine::FileOptions; use query::sql::{ check_file_to_table_schema_compatibility, file_column_schemas_to_table, @@ -530,6 +531,18 @@ pub(crate) fn to_alter_expr( change_table_options: options.into_iter().map(Into::into).collect(), }) } + AlterTableOperation::ChangeColumnFulltext { + column_name, + options, + } => Kind::ChangeColumnFulltext(ChangeColumnFulltext { + column_name: column_name.value, + enable: options.enable, + analyzer: match options.analyzer { + FulltextAnalyzer::English => Analyzer::English.into(), + FulltextAnalyzer::Chinese => Analyzer::Chinese.into(), + }, + case_sensitive: options.case_sensitive, + }), }; Ok(AlterExpr { diff --git a/src/query/src/sql/show_create_table.rs b/src/query/src/sql/show_create_table.rs index 29cd94766611..68be43785703 100644 --- a/src/query/src/sql/show_create_table.rs +++ b/src/query/src/sql/show_create_table.rs @@ -17,7 +17,10 @@ use std::collections::HashMap; use common_meta::SchemaOptions; -use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, SchemaRef, COMMENT_KEY}; +use datatypes::schema::{ + ColumnDefaultConstraint, ColumnSchema, SchemaRef, COLUMN_FULLTEXT_OPT_KEY_ANALYZER, + COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COMMENT_KEY, +}; use humantime::format_duration; use snafu::ResultExt; use sql::ast::{ColumnDef, ColumnOption, ColumnOptionDef, Expr, Ident, ObjectName}; @@ -25,7 +28,6 @@ use sql::dialect::GreptimeDbDialect; use sql::parser::ParserContext; use sql::statements::create::{Column, ColumnExtensions, CreateTable, TableConstraint}; use sql::statements::{self, OptionMap}; -use sql::{COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE}; use store_api::metric_engine_consts::{is_metric_engine, is_metric_engine_internal_column}; use table::metadata::{TableInfoRef, TableMeta}; use table::requests::{FILE_TABLE_META_KEY, TTL_KEY, WRITE_BUFFER_SIZE_KEY}; diff --git a/src/sql/src/error.rs b/src/sql/src/error.rs index 920a8ad827eb..245210055e4d 100644 --- a/src/sql/src/error.rs +++ b/src/sql/src/error.rs @@ -319,13 +319,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Invalid fulltext option: {}", msg))] - FulltextInvalidOption { - msg: String, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Failed to set fulltext option"))] SetFulltextOption { source: datatypes::error::Error, @@ -366,8 +359,7 @@ impl ErrorExt for Error { | Simplification { .. } | InvalidInterval { .. } | InvalidUnaryOp { .. } - | UnsupportedUnaryOp { .. } - | FulltextInvalidOption { .. } => StatusCode::InvalidArguments, + | UnsupportedUnaryOp { .. } => StatusCode::InvalidArguments, SerializeColumnDefaultConstraint { source, .. } => source.status_code(), ConvertToGrpcDataType { source, .. } => source.status_code(), diff --git a/src/sql/src/lib.rs b/src/sql/src/lib.rs index 117c8b8f5014..33dfdc215d71 100644 --- a/src/sql/src/lib.rs +++ b/src/sql/src/lib.rs @@ -24,7 +24,5 @@ pub mod parsers; pub mod statements; pub mod util; -pub use parsers::create_parser::{ - COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, ENGINE, MAXVALUE, -}; +pub use parsers::create_parser::{ENGINE, MAXVALUE}; pub use parsers::tql_parser::TQL; diff --git a/src/sql/src/parsers/alter_parser.rs b/src/sql/src/parsers/alter_parser.rs index bcb63881e246..0bd9c56cec79 100644 --- a/src/sql/src/parsers/alter_parser.rs +++ b/src/sql/src/parsers/alter_parser.rs @@ -12,16 +12,25 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; + use common_query::AddColumnLocation; -use snafu::ResultExt; +use datatypes::schema::COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE; +use snafu::{ensure, ResultExt}; use sqlparser::keywords::Keyword; use sqlparser::parser::{Parser, ParserError}; use sqlparser::tokenizer::Token; -use crate::error::{self, Result}; +use crate::error::{self, InvalidColumnOptionSnafu, Result, SetFulltextOptionSnafu}; use crate::parser::ParserContext; +use crate::parsers::utils::validate_column_fulltext_create_option; use crate::statements::alter::{AlterTable, AlterTableOperation, ChangeTableOption}; use crate::statements::statement::Statement; +use crate::util::parse_option_string; + +fn validate_column_fulltext_alter_option(key: &str) -> bool { + key == COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE || validate_column_fulltext_create_option(key) +} impl ParserContext<'_> { pub(crate) fn parse_alter(&mut self) -> Result { @@ -143,12 +152,41 @@ impl ParserContext<'_> { .parse_identifier(false) .context(error::SyntaxSnafu)?, ); - let target_type = self.parser.parse_data_type().context(error::SyntaxSnafu)?; - Ok(AlterTableOperation::ChangeColumnType { - column_name, - target_type, - }) + if self.parser.parse_keyword(Keyword::SET) { + self.parser + .expect_keyword(Keyword::FULLTEXT) + .context(error::SyntaxSnafu)?; + + let options = self + .parser + .parse_options(Keyword::WITH) + .context(error::SyntaxSnafu)? + .into_iter() + .map(parse_option_string) + .collect::>>()?; + + for key in options.keys() { + ensure!( + validate_column_fulltext_alter_option(key), + InvalidColumnOptionSnafu { + name: column_name.to_string(), + msg: format!("invalid FULLTEXT option: {key}"), + } + ); + } + + Ok(AlterTableOperation::ChangeColumnFulltext { + column_name, + options: options.try_into().context(SetFulltextOptionSnafu)?, + }) + } else { + let target_type = self.parser.parse_data_type().context(error::SyntaxSnafu)?; + Ok(AlterTableOperation::ChangeColumnType { + column_name, + target_type, + }) + } } } @@ -173,6 +211,7 @@ mod tests { use std::assert_matches::assert_matches; use common_error::ext::ErrorExt; + use datatypes::schema::{FulltextAnalyzer, FulltextOptions}; use sqlparser::ast::{ColumnOption, DataType}; use super::*; @@ -515,4 +554,58 @@ mod tests { ) .unwrap_err(); } + + #[test] + fn test_parse_alter_column_fulltext() { + let sql = "ALTER TABLE test_table MODIFY COLUMN a SET FULLTEXT WITH(enable='true',analyzer='English',case_sensitive='false')"; + let mut result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + + assert_eq!(1, result.len()); + let statement = result.remove(0); + assert_matches!(statement, Statement::Alter { .. }); + match statement { + Statement::Alter(alter_table) => { + assert_eq!("test_table", alter_table.table_name().0[0].value); + + let alter_operation = alter_table.alter_operation(); + assert_matches!( + alter_operation, + AlterTableOperation::ChangeColumnFulltext { .. } + ); + match alter_operation { + AlterTableOperation::ChangeColumnFulltext { + column_name, + options, + } => { + assert_eq!("a", column_name.value); + assert_eq!( + FulltextOptions { + enable: true, + analyzer: FulltextAnalyzer::English, + case_sensitive: false + }, + *options + ); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } + + let invalid_sql = "ALTER TABLE test_table MODIFY COLUMN a SET FULLTEXT WITH('abcd'='true')"; + let result = ParserContext::create_with_dialect( + invalid_sql, + &GreptimeDbDialect {}, + ParseOptions::default(), + ) + .unwrap_err(); + let err = result.to_string(); + assert_eq!( + err, + "Invalid column option, column name: a, error: invalid FULLTEXT option: abcd" + ); + } } diff --git a/src/sql/src/parsers/create_parser.rs b/src/sql/src/parsers/create_parser.rs index c05a7b9711cd..43b0d5b3591c 100644 --- a/src/sql/src/parsers/create_parser.rs +++ b/src/sql/src/parsers/create_parser.rs @@ -36,6 +36,7 @@ use crate::error::{ SyntaxSnafu, UnexpectedSnafu, UnsupportedSnafu, }; use crate::parser::{ParserContext, FLOW}; +use crate::parsers::utils::validate_column_fulltext_create_option; use crate::statements::create::{ Column, ColumnExtensions, CreateDatabase, CreateExternalTable, CreateFlow, CreateTable, CreateTableLike, CreateView, Partitions, TableConstraint, @@ -59,17 +60,6 @@ fn validate_database_option(key: &str) -> bool { [DB_OPT_KEY_TTL].contains(&key) } -pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer"; -pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive"; - -fn validate_column_fulltext_option(key: &str) -> bool { - [ - COLUMN_FULLTEXT_OPT_KEY_ANALYZER, - COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, - ] - .contains(&key) -} - /// Parses create [table] statement impl<'a> ParserContext<'a> { pub(crate) fn parse_create(&mut self) -> Result { @@ -706,7 +696,7 @@ impl<'a> ParserContext<'a> { for key in options.keys() { ensure!( - validate_column_fulltext_option(key), + validate_column_fulltext_create_option(key), InvalidColumnOptionSnafu { name: column_name.to_string(), msg: format!("invalid FULLTEXT option: {key}"), diff --git a/src/sql/src/parsers/utils.rs b/src/sql/src/parsers/utils.rs index 7332d3e0e8d5..ae5146d7ee7b 100644 --- a/src/sql/src/parsers/utils.rs +++ b/src/sql/src/parsers/utils.rs @@ -26,6 +26,7 @@ use datafusion_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF}; use datafusion_sql::planner::{ContextProvider, SqlToRel}; use datafusion_sql::TableReference; use datatypes::arrow::datatypes::DataType; +use datatypes::schema::{COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE}; use snafu::ResultExt; use crate::error::{ @@ -110,3 +111,11 @@ impl ContextProvider for StubContextProvider { self.state.window_functions().keys().cloned().collect() } } + +pub fn validate_column_fulltext_create_option(key: &str) -> bool { + [ + COLUMN_FULLTEXT_OPT_KEY_ANALYZER, + COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, + ] + .contains(&key) +} diff --git a/src/sql/src/statements.rs b/src/sql/src/statements.rs index ea565a3b3e08..f3a60651a177 100644 --- a/src/sql/src/statements.rs +++ b/src/sql/src/statements.rs @@ -684,7 +684,9 @@ mod tests { use api::v1::ColumnDataType; use common_time::timestamp::TimeUnit; use common_time::timezone::set_default_timezone; - use datatypes::schema::FulltextAnalyzer; + use datatypes::schema::{ + FulltextAnalyzer, COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, + }; use datatypes::types::BooleanType; use datatypes::value::OrderedFloat; @@ -692,7 +694,6 @@ mod tests { use crate::ast::TimezoneInfo; use crate::statements::create::ColumnExtensions; use crate::statements::ColumnOption; - use crate::{COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE}; fn check_type(sql_type: SqlDataType, data_type: ConcreteDataType) { assert_eq!( diff --git a/src/sql/src/statements/alter.rs b/src/sql/src/statements/alter.rs index a32f7b55cdf2..29efb3f568ea 100644 --- a/src/sql/src/statements/alter.rs +++ b/src/sql/src/statements/alter.rs @@ -16,6 +16,7 @@ use std::fmt::{Debug, Display}; use api::v1; use common_query::AddColumnLocation; +use datatypes::schema::FulltextOptions; use itertools::Itertools; use sqlparser::ast::{ColumnDef, DataType, Ident, ObjectName, TableConstraint}; use sqlparser_derive::{Visit, VisitMut}; @@ -75,6 +76,11 @@ pub enum AlterTableOperation { DropColumn { name: Ident }, /// `RENAME ` RenameTable { new_table_name: String }, + /// `MODIFY COLUMN SET FULLTEXT [WITH ]` + ChangeColumnFulltext { + column_name: Ident, + options: FulltextOptions, + }, } impl Display for AlterTableOperation { @@ -117,6 +123,13 @@ impl Display for AlterTableOperation { Ok(()) } + AlterTableOperation::ChangeColumnFulltext { + column_name, + options, + } => write!( + f, + r#"MODIFY COLUMN {column_name} SET FULLTEXT WITH({options})"#, + ), } } } @@ -229,5 +242,26 @@ ALTER TABLE monitor RENAME monitor_new"#, unreachable!(); } } + + let sql = "ALTER TABLE monitor MODIFY COLUMN a SET FULLTEXT WITH(enable='true',analyzer='English',case_sensitive='false')"; + let stmts = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + assert_eq!(1, stmts.len()); + assert_matches!(&stmts[0], Statement::Alter { .. }); + + match &stmts[0] { + Statement::Alter(set) => { + let new_sql = format!("\n{}", set); + assert_eq!( + r#" +ALTER TABLE monitor MODIFY COLUMN a SET FULLTEXT WITH(enable=true, analyzer=English, case_sensitive=false)"#, + &new_sql + ); + } + _ => { + unreachable!(); + } + } } } diff --git a/src/sql/src/statements/create.rs b/src/sql/src/statements/create.rs index bba7c666bf4e..e376be67d026 100644 --- a/src/sql/src/statements/create.rs +++ b/src/sql/src/statements/create.rs @@ -12,19 +12,20 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; use std::fmt::{Display, Formatter}; use common_catalog::consts::FILE_ENGINE; -use datatypes::schema::{FulltextAnalyzer, FulltextOptions}; +use datatypes::schema::FulltextOptions; use itertools::Itertools; +use snafu::ResultExt; use sqlparser::ast::{ColumnOptionDef, DataType, Expr, Query}; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::{ColumnDef, Ident, ObjectName, Value as SqlValue}; -use crate::error::{FulltextInvalidOptionSnafu, Result}; +use crate::error::{Result, SetFulltextOptionSnafu}; use crate::statements::statement::Statement; use crate::statements::OptionMap; -use crate::{COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE}; const LINE_SEP: &str = ",\n"; const COMMA_SEP: &str = ", "; @@ -156,36 +157,8 @@ impl ColumnExtensions { return Ok(None); }; - let mut fulltext = FulltextOptions { - enable: true, - ..Default::default() - }; - if let Some(analyzer) = options.get(COLUMN_FULLTEXT_OPT_KEY_ANALYZER) { - match analyzer.to_ascii_lowercase().as_str() { - "english" => fulltext.analyzer = FulltextAnalyzer::English, - "chinese" => fulltext.analyzer = FulltextAnalyzer::Chinese, - _ => { - return FulltextInvalidOptionSnafu { - msg: format!("{analyzer}, expected: 'English' | 'Chinese'"), - } - .fail(); - } - } - } - if let Some(case_sensitive) = options.get(COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE) { - match case_sensitive.to_ascii_lowercase().as_str() { - "true" => fulltext.case_sensitive = true, - "false" => fulltext.case_sensitive = false, - _ => { - return FulltextInvalidOptionSnafu { - msg: format!("{case_sensitive}, expected: 'true' | 'false'"), - } - .fail(); - } - } - } - - Ok(Some(fulltext)) + let options: HashMap = options.clone().into_map(); + Ok(Some(options.try_into().context(SetFulltextOptionSnafu)?)) } } diff --git a/src/store-api/Cargo.toml b/src/store-api/Cargo.toml index 2d11b31ebe45..7c974661e315 100644 --- a/src/store-api/Cargo.toml +++ b/src/store-api/Cargo.toml @@ -23,6 +23,7 @@ datatypes.workspace = true derive_builder.workspace = true futures.workspace = true humantime.workspace = true +prost.workspace = true serde.workspace = true serde_json.workspace = true snafu.workspace = true diff --git a/src/store-api/src/metadata.rs b/src/store-api/src/metadata.rs index 936a20a7c3cb..bb68df458332 100644 --- a/src/store-api/src/metadata.rs +++ b/src/store-api/src/metadata.rs @@ -28,7 +28,7 @@ use common_error::ext::ErrorExt; use common_error::status_code::StatusCode; use common_macro::stack_trace_debug; use datatypes::arrow::datatypes::FieldRef; -use datatypes::schema::{ColumnSchema, Schema, SchemaRef}; +use datatypes::schema::{ColumnSchema, FulltextOptions, Schema, SchemaRef}; use serde::de::Error; use serde::{Deserialize, Deserializer, Serialize}; use snafu::{ensure, Location, OptionExt, ResultExt, Snafu}; @@ -553,6 +553,10 @@ impl RegionMetadataBuilder { AlterKind::AddColumns { columns } => self.add_columns(columns)?, AlterKind::DropColumns { names } => self.drop_columns(&names), AlterKind::ChangeColumnTypes { columns } => self.change_column_types(columns), + AlterKind::ChangeColumnFulltext { + column_name, + options, + } => self.change_column_fulltext_options(column_name, options)?, AlterKind::ChangeRegionOptions { options: _ } => { // nothing to be done with RegionMetadata } @@ -655,6 +659,47 @@ impl RegionMetadataBuilder { } } } + + fn change_column_fulltext_options( + &mut self, + column_name: String, + options: FulltextOptions, + ) -> Result<()> { + for column_meta in self.column_metadatas.iter_mut() { + if column_meta.column_schema.name == column_name { + ensure!( + column_meta.column_schema.data_type.is_string(), + InvalidColumnOptionSnafu { + column_name, + msg: "FULLTEXT index only supports string type".to_string(), + } + ); + + let current_fulltext_options = column_meta + .column_schema + .fulltext_options() + .context(SetFulltextOptionsSnafu { + column_name: column_name.clone(), + })?; + + // Don't allow to enable fulltext options if it is already enabled. + if current_fulltext_options.is_some_and(|o| o.enable) && options.enable { + return InvalidColumnOptionSnafu { + column_name, + msg: "FULLTEXT index options already enabled".to_string(), + } + .fail(); + } else { + column_meta + .column_schema + .set_fulltext_options(&options) + .context(SetFulltextOptionsSnafu { column_name })?; + } + break; + } + } + Ok(()) + } } /// Fields skipped in serialization. @@ -779,6 +824,30 @@ pub enum MetadataError { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Failed to decode protobuf"))] + DecodeProto { + #[snafu(source)] + error: prost::DecodeError, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Invalid column option, column name: {}, error: {}", column_name, msg))] + InvalidColumnOption { + column_name: String, + msg: String, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Failed to set fulltext options for column {}", column_name))] + SetFulltextOptions { + column_name: String, + source: datatypes::Error, + #[snafu(implicit)] + location: Location, + }, } impl ErrorExt for MetadataError { @@ -1211,6 +1280,32 @@ mod test { .column_schema .data_type; assert_eq!(ConcreteDataType::string_datatype(), *b_type); + + let mut builder = RegionMetadataBuilder::from_existing(metadata); + builder + .alter(AlterKind::ChangeColumnFulltext { + column_name: "b".to_string(), + options: FulltextOptions { + enable: true, + analyzer: datatypes::schema::FulltextAnalyzer::Chinese, + case_sensitive: true, + }, + }) + .unwrap(); + let metadata = builder.build().unwrap(); + let a_fulltext_options = metadata + .column_by_name("b") + .unwrap() + .column_schema + .fulltext_options() + .unwrap() + .unwrap(); + assert!(a_fulltext_options.enable); + assert_eq!( + datatypes::schema::FulltextAnalyzer::Chinese, + a_fulltext_options.analyzer + ); + assert!(a_fulltext_options.case_sensitive); } #[test] diff --git a/src/store-api/src/region_request.rs b/src/store-api/src/region_request.rs index 867f23175445..067d9e78a388 100644 --- a/src/store-api/src/region_request.rs +++ b/src/store-api/src/region_request.rs @@ -18,23 +18,25 @@ use std::time::Duration; use api::helper::ColumnDataTypeWrapper; use api::v1::add_column_location::LocationType; -use api::v1::region::alter_request::Kind; +use api::v1::column_def::as_fulltext_option; use api::v1::region::{ alter_request, compact_request, region_request, AlterRequest, AlterRequests, CloseRequest, CompactRequest, CreateRequest, CreateRequests, DeleteRequests, DropRequest, DropRequests, FlushRequest, InsertRequests, OpenRequest, TruncateRequest, }; -use api::v1::{self, ChangeTableOption, Rows, SemanticType}; +use api::v1::{self, Analyzer, ChangeTableOption, Rows, SemanticType}; pub use common_base::AffectedRows; use datatypes::data_type::ConcreteDataType; +use datatypes::schema::FulltextOptions; use serde::{Deserialize, Serialize}; -use snafu::{ensure, OptionExt}; +use snafu::{ensure, OptionExt, ResultExt}; use strum::IntoStaticStr; use crate::logstore::entry; use crate::metadata::{ - ColumnMetadata, InvalidRawRegionRequestSnafu, InvalidRegionOptionChangeRequestSnafu, - InvalidRegionRequestSnafu, MetadataError, RegionMetadata, Result, + ColumnMetadata, DecodeProtoSnafu, InvalidRawRegionRequestSnafu, + InvalidRegionOptionChangeRequestSnafu, InvalidRegionRequestSnafu, MetadataError, + RegionMetadata, Result, }; use crate::mito_engine_options::TTL_KEY; use crate::path_utils::region_dir; @@ -395,6 +397,11 @@ pub enum AlterKind { }, /// Change region options. ChangeRegionOptions { options: Vec }, + /// Change fulltext index options. + ChangeColumnFulltext { + column_name: String, + options: FulltextOptions, + }, } impl AlterKind { @@ -419,6 +426,9 @@ impl AlterKind { } } AlterKind::ChangeRegionOptions { .. } => {} + AlterKind::ChangeColumnFulltext { column_name, .. } => { + Self::validate_column_fulltext_option(column_name, metadata)?; + } } Ok(()) } @@ -441,6 +451,9 @@ impl AlterKind { // todo: we need to check if ttl has ever changed. true } + AlterKind::ChangeColumnFulltext { column_name, .. } => { + metadata.column_by_name(column_name).is_some() + } } } @@ -458,6 +471,32 @@ impl AlterKind { ); Ok(()) } + + /// Returns an error if the column to change fulltext index option is invalid. + fn validate_column_fulltext_option( + column_name: &String, + metadata: &RegionMetadata, + ) -> Result<()> { + let column = metadata + .column_by_name(column_name) + .context(InvalidRegionRequestSnafu { + region_id: metadata.region_id, + err: format!("column {} not found", column_name), + })?; + + ensure!( + column.column_schema.data_type.is_string(), + InvalidRegionRequestSnafu { + region_id: metadata.region_id, + err: format!( + "cannot change fulltext index options for non-string column {}", + column_name + ), + } + ); + + Ok(()) + } } impl TryFrom for AlterKind { @@ -485,12 +524,24 @@ impl TryFrom for AlterKind { let names = x.drop_columns.into_iter().map(|x| x.name).collect(); AlterKind::DropColumns { names } } - Kind::ChangeTableOptions(change_options) => AlterKind::ChangeRegionOptions { - options: change_options - .change_table_options - .iter() - .map(TryFrom::try_from) - .collect::>>()?, + alter_request::Kind::ChangeTableOptions(change_options) => { + AlterKind::ChangeRegionOptions { + options: change_options + .change_table_options + .iter() + .map(TryFrom::try_from) + .collect::>>()?, + } + } + alter_request::Kind::ChangeColumnFulltext(x) => AlterKind::ChangeColumnFulltext { + column_name: x.column_name.clone(), + options: FulltextOptions { + enable: x.enable, + analyzer: as_fulltext_option( + Analyzer::try_from(x.analyzer).context(DecodeProtoSnafu)?, + ), + case_sensitive: x.case_sensitive, + }, }, }; @@ -743,7 +794,7 @@ mod tests { use api::v1::region::RegionColumnDef; use api::v1::{ColumnDataType, ColumnDef}; use datatypes::prelude::ConcreteDataType; - use datatypes::schema::ColumnSchema; + use datatypes::schema::{ColumnSchema, FulltextAnalyzer}; use super::*; use crate::metadata::RegionMetadataBuilder; @@ -1137,4 +1188,23 @@ mod tests { assert!(create.validate().is_err()); } + + #[test] + fn test_validate_change_column_fulltext_options() { + let kind = AlterKind::ChangeColumnFulltext { + column_name: "tag_0".to_string(), + options: FulltextOptions { + enable: true, + analyzer: FulltextAnalyzer::Chinese, + case_sensitive: false, + }, + }; + let request = RegionAlterRequest { + schema_version: 1, + kind, + }; + let mut metadata = new_metadata(); + metadata.schema_version = 1; + request.validate(&metadata).unwrap(); + } } diff --git a/src/table/src/error.rs b/src/table/src/error.rs index e6ea1e19325f..344140680f35 100644 --- a/src/table/src/error.rs +++ b/src/table/src/error.rs @@ -140,6 +140,22 @@ pub enum Error { #[snafu(display("Table options value is not valid, key: `{}`, value: `{}`", key, value))] InvalidTableOptionValue { key: String, value: String }, + + #[snafu(display("Invalid column option, column name: {}, error: {}", column_name, msg))] + InvalidColumnOption { + column_name: String, + msg: String, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Failed to set fulltext options for column {}", column_name))] + SetFulltextOptions { + column_name: String, + source: datatypes::Error, + #[snafu(implicit)] + location: Location, + }, } impl ErrorExt for Error { @@ -154,8 +170,11 @@ impl ErrorExt for Error { | Error::InvalidAlterRequest { .. } => StatusCode::InvalidArguments, Error::TablesRecordBatch { .. } => StatusCode::Unexpected, Error::ColumnExists { .. } => StatusCode::TableColumnExists, - Error::SchemaBuild { source, .. } => source.status_code(), + Error::SchemaBuild { source, .. } | Error::SetFulltextOptions { source, .. } => { + source.status_code() + } Error::TableOperation { source } => source.status_code(), + Error::InvalidColumnOption { .. } => StatusCode::InvalidArguments, Error::ColumnNotExists { .. } => StatusCode::TableColumnNotFound, Error::Unsupported { .. } => StatusCode::Unsupported, Error::ParseTableOption { .. } => StatusCode::InvalidArguments, diff --git a/src/table/src/metadata.rs b/src/table/src/metadata.rs index 2b56e214cb77..d2045d302aa7 100644 --- a/src/table/src/metadata.rs +++ b/src/table/src/metadata.rs @@ -20,7 +20,9 @@ use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; use common_query::AddColumnLocation; use datafusion_expr::TableProviderFilterPushDown; pub use datatypes::error::{Error as ConvertError, Result as ConvertResult}; -use datatypes::schema::{ColumnSchema, RawSchema, Schema, SchemaBuilder, SchemaRef}; +use datatypes::schema::{ + ColumnSchema, FulltextOptions, RawSchema, Schema, SchemaBuilder, SchemaRef, +}; use derive_builder::Builder; use serde::{Deserialize, Serialize}; use snafu::{ensure, OptionExt, ResultExt}; @@ -203,6 +205,10 @@ impl TableMeta { // No need to rebuild table meta when renaming tables. AlterKind::RenameTable { .. } => Ok(self.new_meta_builder()), AlterKind::ChangeTableOptions { options } => self.change_table_options(options), + AlterKind::ChangeColumnFulltext { + column_name, + options, + } => self.change_column_fulltext_options(table_name, column_name, options), } } @@ -227,6 +233,80 @@ impl TableMeta { Ok(builder) } + /// Creates a [TableMetaBuilder] with modified column fulltext options. + fn change_column_fulltext_options( + &self, + table_name: &str, + column_name: &str, + options: &FulltextOptions, + ) -> Result { + let table_schema = &self.schema; + let mut meta_builder = self.new_meta_builder(); + + let column = &table_schema + .column_schema_by_name(column_name) + .with_context(|| error::ColumnNotExistsSnafu { + column_name, + table_name, + })?; + + ensure!( + column.data_type.is_string(), + error::InvalidColumnOptionSnafu { + column_name, + msg: "FULLTEXT index only supports string type", + } + ); + + let current_fulltext_options = column + .fulltext_options() + .context(error::SetFulltextOptionsSnafu { column_name })?; + + ensure!( + !(current_fulltext_options.is_some_and(|o| o.enable) && options.enable), + error::InvalidColumnOptionSnafu { + column_name, + msg: "FULLTEXT index options already enabled", + } + ); + + let mut columns = Vec::with_capacity(table_schema.column_schemas().len()); + for column_schema in table_schema.column_schemas() { + if column_schema.name == column_name { + let mut new_column_schema = column_schema.clone(); + new_column_schema + .set_fulltext_options(options) + .context(error::SetFulltextOptionsSnafu { column_name })?; + columns.push(new_column_schema); + } else { + columns.push(column_schema.clone()); + } + } + + // TODO(CookiePieWw): This part for all alter table operations is similar. We can refactor it. + let mut builder = SchemaBuilder::try_from_columns(columns) + .with_context(|_| error::SchemaBuildSnafu { + msg: format!("Failed to convert column schemas into schema for table {table_name}"), + })? + .version(table_schema.version() + 1); + + for (k, v) in table_schema.metadata().iter() { + builder = builder.add_metadata(k, v); + } + + let new_schema = builder.build().with_context(|_| error::SchemaBuildSnafu { + msg: format!( + "Table {table_name} cannot change fulltext options for column {column_name}", + ), + })?; + + let _ = meta_builder + .schema(Arc::new(new_schema)) + .primary_key_indices(self.primary_key_indices.clone()); + + Ok(meta_builder) + } + /// Allocate a new column for the table. /// /// This method would bump the `next_column_id` of the meta. @@ -1327,4 +1407,58 @@ mod tests { assert_eq!(&[0, 1], &new_meta.primary_key_indices[..]); assert_eq!(&[2, 3, 4], &new_meta.value_indices[..]); } + + #[test] + fn test_alter_column_fulltext_options() { + let schema = Arc::new(new_test_schema()); + let meta = TableMetaBuilder::default() + .schema(schema) + .primary_key_indices(vec![0]) + .engine("engine") + .next_column_id(3) + .build() + .unwrap(); + + let alter_kind = AlterKind::ChangeColumnFulltext { + column_name: "col1".to_string(), + options: FulltextOptions::default(), + }; + let err = meta + .builder_with_alter_kind("my_table", &alter_kind, false) + .err() + .unwrap(); + assert_eq!( + "Invalid column option, column name: col1, error: FULLTEXT index only supports string type", + err.to_string() + ); + + // Add a string column and make it fulltext indexed + let new_meta = add_columns_to_meta_with_location(&meta); + assert_eq!(meta.region_numbers, new_meta.region_numbers); + + let alter_kind = AlterKind::ChangeColumnFulltext { + column_name: "my_tag_first".to_string(), + options: FulltextOptions { + enable: true, + analyzer: datatypes::schema::FulltextAnalyzer::Chinese, + case_sensitive: true, + }, + }; + let new_meta = new_meta + .builder_with_alter_kind("my_table", &alter_kind, false) + .unwrap() + .build() + .unwrap(); + let column_schema = new_meta + .schema + .column_schema_by_name("my_tag_first") + .unwrap(); + let fulltext_options = column_schema.fulltext_options().unwrap().unwrap(); + assert!(fulltext_options.enable); + assert_eq!( + datatypes::schema::FulltextAnalyzer::Chinese, + fulltext_options.analyzer + ); + assert!(fulltext_options.case_sensitive); + } } diff --git a/src/table/src/requests.rs b/src/table/src/requests.rs index a4fcce7bcdcf..d37bd7ca7326 100644 --- a/src/table/src/requests.rs +++ b/src/table/src/requests.rs @@ -25,7 +25,7 @@ use common_query::AddColumnLocation; use common_time::range::TimestampRange; use datatypes::data_type::ConcreteDataType; use datatypes::prelude::VectorRef; -use datatypes::schema::ColumnSchema; +use datatypes::schema::{ColumnSchema, FulltextOptions}; use greptime_proto::v1::region::compact_request; use serde::{Deserialize, Serialize}; use store_api::metric_engine_consts::{LOGICAL_TABLE_METADATA_KEY, PHYSICAL_TABLE_METADATA_KEY}; @@ -216,6 +216,10 @@ pub enum AlterKind { ChangeTableOptions { options: Vec, }, + ChangeColumnFulltext { + column_name: String, + options: FulltextOptions, + }, } // #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/tests/cases/standalone/common/alter/change_col_fulltext_options.result b/tests/cases/standalone/common/alter/change_col_fulltext_options.result new file mode 100644 index 000000000000..f9af6e9288e2 --- /dev/null +++ b/tests/cases/standalone/common/alter/change_col_fulltext_options.result @@ -0,0 +1,179 @@ +CREATE TABLE `test` ( + `message` STRING, + `time` TIMESTAMP TIME INDEX, +) WITH ( + append_mode = 'true' +); + +Affected Rows: 0 + +SHOW CREATE TABLE test; + ++-------+-------------------------------------+ +| Table | Create Table | ++-------+-------------------------------------+ +| test | CREATE TABLE IF NOT EXISTS "test" ( | +| | "message" STRING NULL, | +| | "time" TIMESTAMP(3) NOT NULL, | +| | TIME INDEX ("time") | +| | ) | +| | | +| | ENGINE=mito | +| | WITH( | +| | append_mode = 'true' | +| | ) | ++-------+-------------------------------------+ + +-- Write/read after altering column fulltext options +INSERT INTO test VALUES ('hello', '2020-01-01 00:00:00'), +('world', '2020-01-01 00:00:01'), +('hello world', '2020-01-02 00:00:00'), +('world hello', '2020-01-02 00:00:01'); + +Affected Rows: 4 + +SELECT * FROM test WHERE MATCHES(message, 'hello'); + ++-------------+---------------------+ +| message | time | ++-------------+---------------------+ +| hello | 2020-01-01T00:00:00 | +| hello world | 2020-01-02T00:00:00 | +| world hello | 2020-01-02T00:00:01 | ++-------------+---------------------+ + +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT WITH(analyzer = 'Chinese', case_sensitive = 'true'); + +Affected Rows: 0 + +SELECT * FROM test WHERE MATCHES(message, 'hello'); + ++-------------+---------------------+ +| message | time | ++-------------+---------------------+ +| hello | 2020-01-01T00:00:00 | +| hello world | 2020-01-02T00:00:00 | +| world hello | 2020-01-02T00:00:01 | ++-------------+---------------------+ + +INSERT INTO test VALUES ('hello NiKo', '2020-01-03 00:00:00'), +('NiKo hello', '2020-01-03 00:00:01'), +('hello hello', '2020-01-04 00:00:00'), +('NiKo, NiKo', '2020-01-04 00:00:01'); + +Affected Rows: 4 + +SELECT * FROM test WHERE MATCHES(message, 'hello'); + ++-------------+---------------------+ +| message | time | ++-------------+---------------------+ +| hello NiKo | 2020-01-03T00:00:00 | +| NiKo hello | 2020-01-03T00:00:01 | +| hello hello | 2020-01-04T00:00:00 | +| hello | 2020-01-01T00:00:00 | +| hello world | 2020-01-02T00:00:00 | +| world hello | 2020-01-02T00:00:01 | ++-------------+---------------------+ + +-- SQLNESS ARG restart=true +SHOW CREATE TABLE test; + ++-------+---------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+---------------------------------------------------------------------------------------+ +| test | CREATE TABLE IF NOT EXISTS "test" ( | +| | "message" STRING NULL FULLTEXT WITH(analyzer = 'Chinese', case_sensitive = 'true'), | +| | "time" TIMESTAMP(3) NOT NULL, | +| | TIME INDEX ("time") | +| | ) | +| | | +| | ENGINE=mito | +| | WITH( | +| | append_mode = 'true' | +| | ) | ++-------+---------------------------------------------------------------------------------------+ + +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT WITH(enable = 'false'); + +Affected Rows: 0 + +SHOW CREATE TABLE test; + ++-------+-------------------------------------+ +| Table | Create Table | ++-------+-------------------------------------+ +| test | CREATE TABLE IF NOT EXISTS "test" ( | +| | "message" STRING NULL, | +| | "time" TIMESTAMP(3) NOT NULL, | +| | TIME INDEX ("time") | +| | ) | +| | | +| | ENGINE=mito | +| | WITH( | +| | append_mode = 'true' | +| | ) | ++-------+-------------------------------------+ + +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT WITH(analyzer = 'English', case_sensitive = 'true'); + +Affected Rows: 0 + +SHOW CREATE TABLE test; + ++-------+---------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+---------------------------------------------------------------------------------------+ +| test | CREATE TABLE IF NOT EXISTS "test" ( | +| | "message" STRING NULL FULLTEXT WITH(analyzer = 'English', case_sensitive = 'true'), | +| | "time" TIMESTAMP(3) NOT NULL, | +| | TIME INDEX ("time") | +| | ) | +| | | +| | ENGINE=mito | +| | WITH( | +| | append_mode = 'true' | +| | ) | ++-------+---------------------------------------------------------------------------------------+ + +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT WITH(analyzer = 'Chinese', case_sensitive = 'false'); + +Error: 1004(InvalidArguments), Invalid column option, column name: message, error: FULLTEXT index options already enabled + +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT WITH(enable = 'false'); + +Affected Rows: 0 + +SHOW CREATE TABLE test; + ++-------+-------------------------------------+ +| Table | Create Table | ++-------+-------------------------------------+ +| test | CREATE TABLE IF NOT EXISTS "test" ( | +| | "message" STRING NULL, | +| | "time" TIMESTAMP(3) NOT NULL, | +| | TIME INDEX ("time") | +| | ) | +| | | +| | ENGINE=mito | +| | WITH( | +| | append_mode = 'true' | +| | ) | ++-------+-------------------------------------+ + +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT WITH(analyzer = 'Chinglish', case_sensitive = 'false'); + +Error: 1002(Unexpected), Invalid fulltext option: Chinglish, expected: 'English' | 'Chinese' + +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT WITH(analyzer = 'Chinese', case_sensitive = 'no'); + +Error: 1002(Unexpected), Invalid fulltext option: no, expected: 'true' | 'false' + +ALTER TABLE test MODIFY COLUMN time SET FULLTEXT WITH(analyzer = 'Chinese', case_sensitive = 'false'); + +Error: 1004(InvalidArguments), Invalid column option, column name: time, error: FULLTEXT index only supports string type + +DROP TABLE test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/alter/change_col_fulltext_options.sql b/tests/cases/standalone/common/alter/change_col_fulltext_options.sql new file mode 100644 index 000000000000..82b25235b779 --- /dev/null +++ b/tests/cases/standalone/common/alter/change_col_fulltext_options.sql @@ -0,0 +1,52 @@ +CREATE TABLE `test` ( + `message` STRING, + `time` TIMESTAMP TIME INDEX, +) WITH ( + append_mode = 'true' +); + +SHOW CREATE TABLE test; + +-- Write/read after altering column fulltext options +INSERT INTO test VALUES ('hello', '2020-01-01 00:00:00'), +('world', '2020-01-01 00:00:01'), +('hello world', '2020-01-02 00:00:00'), +('world hello', '2020-01-02 00:00:01'); + +SELECT * FROM test WHERE MATCHES(message, 'hello'); + +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT WITH(analyzer = 'Chinese', case_sensitive = 'true'); + +SELECT * FROM test WHERE MATCHES(message, 'hello'); + +INSERT INTO test VALUES ('hello NiKo', '2020-01-03 00:00:00'), +('NiKo hello', '2020-01-03 00:00:01'), +('hello hello', '2020-01-04 00:00:00'), +('NiKo, NiKo', '2020-01-04 00:00:01'); + +SELECT * FROM test WHERE MATCHES(message, 'hello'); + +-- SQLNESS ARG restart=true +SHOW CREATE TABLE test; + +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT WITH(enable = 'false'); + +SHOW CREATE TABLE test; + +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT WITH(analyzer = 'English', case_sensitive = 'true'); + +SHOW CREATE TABLE test; + +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT WITH(analyzer = 'Chinese', case_sensitive = 'false'); + +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT WITH(enable = 'false'); + +SHOW CREATE TABLE test; + +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT WITH(analyzer = 'Chinglish', case_sensitive = 'false'); + +ALTER TABLE test MODIFY COLUMN message SET FULLTEXT WITH(analyzer = 'Chinese', case_sensitive = 'no'); + +ALTER TABLE test MODIFY COLUMN time SET FULLTEXT WITH(analyzer = 'Chinese', case_sensitive = 'false'); + +DROP TABLE test;