diff --git a/parser/ast.go b/parser/ast.go index 67931d4..ea57de9 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -3049,6 +3049,78 @@ func (f *FormatExpr) String(level int) string { return "FORMAT " + f.Format.String(level) } +type OptimizeExpr struct { + OptimizePos Pos + StatementEnd Pos + Table *TableIdentifier + OnCluster *OnClusterExpr + Partition *PartitionExpr + HasFinal bool + Deduplicate *DeduplicateExpr +} + +func (o *OptimizeExpr) Pos() Pos { + return o.OptimizePos +} + +func (o *OptimizeExpr) End() Pos { + return o.StatementEnd +} + +func (o *OptimizeExpr) String(level int) string { + var builder strings.Builder + builder.WriteString("OPTIMIZE TABLE ") + builder.WriteString(o.Table.String(level)) + if o.OnCluster != nil { + builder.WriteString(NewLine(level)) + builder.WriteString(o.OnCluster.String(level)) + } + if o.Partition != nil { + builder.WriteString(NewLine(level)) + builder.WriteString(o.Partition.String(level)) + } + if o.HasFinal { + builder.WriteString(" FINAL") + } + if o.Deduplicate != nil { + builder.WriteString(o.Deduplicate.String(level)) + } + return builder.String() +} + +type DeduplicateExpr struct { + DeduplicatePos Pos + By *ColumnExprList + Except *ColumnExprList +} + +func (d *DeduplicateExpr) Pos() Pos { + return d.DeduplicatePos +} + +func (d *DeduplicateExpr) End() Pos { + if d.By != nil { + return d.By.End() + } else if d.Except != nil { + return d.Except.End() + } + return d.DeduplicatePos + Pos(len(KeywordDeduplicate)) +} + +func (d *DeduplicateExpr) String(level int) string { + var builder strings.Builder + builder.WriteString(" DEDUPLICATE") + if d.By != nil { + builder.WriteString(" BY ") + builder.WriteString(d.By.String(level)) + } + if d.Except != nil { + builder.WriteString(" EXCEPT ") + builder.WriteString(d.Except.String(level)) + } + return builder.String() +} + type SystemExpr struct { SystemPos Pos Expr Expr diff --git a/parser/keyword.go b/parser/keyword.go index 289bb1e..ee5718e 100644 --- a/parser/keyword.go +++ b/parser/keyword.go @@ -60,6 +60,7 @@ const ( KeywordEnd = "END" KeywordEngine = "ENGINE" KeywordEvents = "EVENTS" + KeywordExcept = "EXCEPT" KeywordExists = "EXISTS" KeywordExplain = "EXPLAIN" KeywordExpression = "EXPRESSION" @@ -261,6 +262,7 @@ var keywords = NewSet( KeywordEnd, KeywordEngine, KeywordEvents, + KeywordExcept, KeywordExists, KeywordExplain, KeywordExpression, diff --git a/parser/parse_system.go b/parser/parse_system.go index ece6de1..f16a161 100644 --- a/parser/parse_system.go +++ b/parser/parse_system.go @@ -198,6 +198,97 @@ func (p *Parser) parseSystemDropExpr(pos Pos) (*SystemDropExpr, error) { } } +func (p *Parser) tryParseDeduplicateExpr(pos Pos) (*DeduplicateExpr, error) { + if !p.matchKeyword(KeywordDeduplicate) { + return nil, nil + } + return p.parseDeduplicateExpr(pos) +} + +func (p *Parser) parseDeduplicateExpr(pos Pos) (*DeduplicateExpr, error) { + if err := p.consumeKeyword(KeywordDeduplicate); err != nil { + return nil, err + } + if p.tryConsumeKeyword(KeywordBy) == nil { + return &DeduplicateExpr{ + DeduplicatePos: pos, + }, nil + } + + by, err := p.parseColumnExprList(p.Pos()) + if err != nil { + return nil, err + } + var except *ColumnExprList + if p.tryConsumeKeyword(KeywordExcept) != nil { + except, err = p.parseColumnExprList(p.Pos()) + if err != nil { + return nil, err + } + } + return &DeduplicateExpr{ + DeduplicatePos: pos, + By: by, + Except: except, + }, nil +} + +func (p *Parser) parseOptimizeExpr(pos Pos) (*OptimizeExpr, error) { + if err := p.consumeKeyword(KeywordOptimize); err != nil { + return nil, err + } + if err := p.consumeKeyword(KeywordTable); err != nil { + return nil, err + } + + table, err := p.parseTableIdentifier(p.Pos()) + if err != nil { + return nil, err + } + statmentEnd := table.End() + + onCluster, err := p.tryParseOnCluster(p.Pos()) + if err != nil { + return nil, err + } + if onCluster != nil { + statmentEnd = onCluster.End() + } + + partitionExpr, err := p.tryParsePartitionExpr(p.Pos()) + if err != nil { + return nil, err + } + if partitionExpr != nil { + statmentEnd = partitionExpr.End() + } + + hasFinal := false + lastPos := p.Pos() + if p.tryConsumeKeyword(KeywordFinal) != nil { + hasFinal = true + statmentEnd = lastPos + } + + deduplicate, err := p.tryParseDeduplicateExpr(p.Pos()) + if err != nil { + return nil, err + } + if deduplicate != nil { + statmentEnd = deduplicate.End() + } + + return &OptimizeExpr{ + OptimizePos: pos, + StatementEnd: statmentEnd, + Table: table, + OnCluster: onCluster, + Partition: partitionExpr, + HasFinal: hasFinal, + Deduplicate: deduplicate, + }, nil +} + func (p *Parser) parseSystemExpr(pos Pos) (*SystemExpr, error) { if err := p.consumeKeyword(KeywordSystem); err != nil { return nil, err diff --git a/parser/parser_table.go b/parser/parser_table.go index 7162855..f13196a 100644 --- a/parser/parser_table.go +++ b/parser/parser_table.go @@ -904,6 +904,8 @@ func (p *Parser) parseStatement(pos Pos) (Expr, error) { expr, err = p.parseSetExpr(pos) case p.matchKeyword(KeywordSystem): expr, err = p.parseSystemExpr(pos) + case p.matchKeyword(KeywordOptimize): + expr, err = p.parseOptimizeExpr(pos) default: return nil, fmt.Errorf("unexpected token: %q", p.last().String) } diff --git a/parser/testdata/ddl/format/optimize.sql b/parser/testdata/ddl/format/optimize.sql new file mode 100644 index 0000000..bf6100e --- /dev/null +++ b/parser/testdata/ddl/format/optimize.sql @@ -0,0 +1,19 @@ +-- Origin SQL: +OPTIMIZE TABLE table DEDUPLICATE; -- all columns +OPTIMIZE TABLE table DEDUPLICATE BY *; -- excludes MATERIALIZED and ALIAS columns +OPTIMIZE TABLE table DEDUPLICATE BY colX,colY,colZ; +OPTIMIZE TABLE table DEDUPLICATE BY * EXCEPT colX; +OPTIMIZE TABLE table DEDUPLICATE BY * EXCEPT (colX, colY); +OPTIMIZE TABLE table DEDUPLICATE BY COLUMNS('column-matched-by-regex'); +OPTIMIZE TABLE table DEDUPLICATE BY COLUMNS('column-matched-by-regex') EXCEPT colX; +OPTIMIZE TABLE table DEDUPLICATE BY COLUMNS('column-matched-by-regex') EXCEPT (colX, colY); + +-- Format SQL: +OPTIMIZE TABLE table DEDUPLICATE; +OPTIMIZE TABLE table DEDUPLICATE BY *; +OPTIMIZE TABLE table DEDUPLICATE BY colX, colY, colZ; +OPTIMIZE TABLE table DEDUPLICATE BY * EXCEPT colX; +OPTIMIZE TABLE table DEDUPLICATE BY * EXCEPT (colX, colY); +OPTIMIZE TABLE table DEDUPLICATE BY COLUMNS('column-matched-by-regex'); +OPTIMIZE TABLE table DEDUPLICATE BY COLUMNS('column-matched-by-regex') EXCEPT colX; +OPTIMIZE TABLE table DEDUPLICATE BY COLUMNS('column-matched-by-regex') EXCEPT (colX, colY); diff --git a/parser/testdata/ddl/optimize.sql b/parser/testdata/ddl/optimize.sql new file mode 100644 index 0000000..a838fa7 --- /dev/null +++ b/parser/testdata/ddl/optimize.sql @@ -0,0 +1,8 @@ +OPTIMIZE TABLE table DEDUPLICATE; -- all columns +OPTIMIZE TABLE table DEDUPLICATE BY *; -- excludes MATERIALIZED and ALIAS columns +OPTIMIZE TABLE table DEDUPLICATE BY colX,colY,colZ; +OPTIMIZE TABLE table DEDUPLICATE BY * EXCEPT colX; +OPTIMIZE TABLE table DEDUPLICATE BY * EXCEPT (colX, colY); +OPTIMIZE TABLE table DEDUPLICATE BY COLUMNS('column-matched-by-regex'); +OPTIMIZE TABLE table DEDUPLICATE BY COLUMNS('column-matched-by-regex') EXCEPT colX; +OPTIMIZE TABLE table DEDUPLICATE BY COLUMNS('column-matched-by-regex') EXCEPT (colX, colY); \ No newline at end of file diff --git a/parser/testdata/ddl/output/optimize.sql.golden.json b/parser/testdata/ddl/output/optimize.sql.golden.json new file mode 100644 index 0000000..a8b8ac6 --- /dev/null +++ b/parser/testdata/ddl/output/optimize.sql.golden.json @@ -0,0 +1,391 @@ +[ + { + "OptimizePos": 0, + "StatementEnd": 32, + "Table": { + "Database": null, + "Table": { + "Name": "table", + "Unquoted": false, + "NamePos": 15, + "NameEnd": 20 + } + }, + "OnCluster": null, + "Partition": null, + "HasFinal": false, + "Deduplicate": { + "DeduplicatePos": 21, + "By": null, + "Except": null + } + }, + { + "OptimizePos": 49, + "StatementEnd": 85, + "Table": { + "Database": null, + "Table": { + "Name": "table", + "Unquoted": false, + "NamePos": 64, + "NameEnd": 69 + } + }, + "OnCluster": null, + "Partition": null, + "HasFinal": false, + "Deduplicate": { + "DeduplicatePos": 70, + "By": { + "ListPos": 85, + "ListEnd": 85, + "Items": [ + { + "Name": "*", + "Unquoted": false, + "NamePos": 85, + "NameEnd": 85 + } + ] + }, + "Except": null + } + }, + { + "OptimizePos": 131, + "StatementEnd": 181, + "Table": { + "Database": null, + "Table": { + "Name": "table", + "Unquoted": false, + "NamePos": 146, + "NameEnd": 151 + } + }, + "OnCluster": null, + "Partition": null, + "HasFinal": false, + "Deduplicate": { + "DeduplicatePos": 152, + "By": { + "ListPos": 167, + "ListEnd": 181, + "Items": [ + { + "Name": "colX", + "Unquoted": false, + "NamePos": 167, + "NameEnd": 171 + }, + { + "Name": "colY", + "Unquoted": false, + "NamePos": 172, + "NameEnd": 176 + }, + { + "Name": "colZ", + "Unquoted": false, + "NamePos": 177, + "NameEnd": 181 + } + ] + }, + "Except": null + } + }, + { + "OptimizePos": 183, + "StatementEnd": 219, + "Table": { + "Database": null, + "Table": { + "Name": "table", + "Unquoted": false, + "NamePos": 198, + "NameEnd": 203 + } + }, + "OnCluster": null, + "Partition": null, + "HasFinal": false, + "Deduplicate": { + "DeduplicatePos": 204, + "By": { + "ListPos": 219, + "ListEnd": 219, + "Items": [ + { + "Name": "*", + "Unquoted": false, + "NamePos": 219, + "NameEnd": 219 + } + ] + }, + "Except": { + "ListPos": 228, + "ListEnd": 232, + "Items": [ + { + "Name": "colX", + "Unquoted": false, + "NamePos": 228, + "NameEnd": 232 + } + ] + } + } + }, + { + "OptimizePos": 234, + "StatementEnd": 270, + "Table": { + "Database": null, + "Table": { + "Name": "table", + "Unquoted": false, + "NamePos": 249, + "NameEnd": 254 + } + }, + "OnCluster": null, + "Partition": null, + "HasFinal": false, + "Deduplicate": { + "DeduplicatePos": 255, + "By": { + "ListPos": 270, + "ListEnd": 270, + "Items": [ + { + "Name": "*", + "Unquoted": false, + "NamePos": 270, + "NameEnd": 270 + } + ] + }, + "Except": { + "ListPos": 279, + "ListEnd": 290, + "Items": [ + { + "LeftParenPos": 279, + "RightParenPos": 290, + "Items": { + "ListPos": 280, + "ListEnd": 290, + "Items": [ + { + "Name": "colX", + "Unquoted": false, + "NamePos": 280, + "NameEnd": 284 + }, + { + "Name": "colY", + "Unquoted": false, + "NamePos": 286, + "NameEnd": 290 + } + ] + }, + "ColumnArgList": null + } + ] + } + } + }, + { + "OptimizePos": 293, + "StatementEnd": 362, + "Table": { + "Database": null, + "Table": { + "Name": "table", + "Unquoted": false, + "NamePos": 308, + "NameEnd": 313 + } + }, + "OnCluster": null, + "Partition": null, + "HasFinal": false, + "Deduplicate": { + "DeduplicatePos": 314, + "By": { + "ListPos": 329, + "ListEnd": 362, + "Items": [ + { + "Name": { + "Name": "COLUMNS", + "Unquoted": false, + "NamePos": 329, + "NameEnd": 336 + }, + "Params": { + "LeftParenPos": 336, + "RightParenPos": 362, + "Items": { + "ListPos": 338, + "ListEnd": 361, + "Items": [ + { + "LiteralPos": 338, + "LiteralEnd": 361, + "Literal": "column-matched-by-regex" + } + ] + }, + "ColumnArgList": null + } + } + ] + }, + "Except": null + } + }, + { + "OptimizePos": 365, + "StatementEnd": 434, + "Table": { + "Database": null, + "Table": { + "Name": "table", + "Unquoted": false, + "NamePos": 380, + "NameEnd": 385 + } + }, + "OnCluster": null, + "Partition": null, + "HasFinal": false, + "Deduplicate": { + "DeduplicatePos": 386, + "By": { + "ListPos": 401, + "ListEnd": 434, + "Items": [ + { + "Name": { + "Name": "COLUMNS", + "Unquoted": false, + "NamePos": 401, + "NameEnd": 408 + }, + "Params": { + "LeftParenPos": 408, + "RightParenPos": 434, + "Items": { + "ListPos": 410, + "ListEnd": 433, + "Items": [ + { + "LiteralPos": 410, + "LiteralEnd": 433, + "Literal": "column-matched-by-regex" + } + ] + }, + "ColumnArgList": null + } + } + ] + }, + "Except": { + "ListPos": 443, + "ListEnd": 447, + "Items": [ + { + "Name": "colX", + "Unquoted": false, + "NamePos": 443, + "NameEnd": 447 + } + ] + } + } + }, + { + "OptimizePos": 449, + "StatementEnd": 518, + "Table": { + "Database": null, + "Table": { + "Name": "table", + "Unquoted": false, + "NamePos": 464, + "NameEnd": 469 + } + }, + "OnCluster": null, + "Partition": null, + "HasFinal": false, + "Deduplicate": { + "DeduplicatePos": 470, + "By": { + "ListPos": 485, + "ListEnd": 518, + "Items": [ + { + "Name": { + "Name": "COLUMNS", + "Unquoted": false, + "NamePos": 485, + "NameEnd": 492 + }, + "Params": { + "LeftParenPos": 492, + "RightParenPos": 518, + "Items": { + "ListPos": 494, + "ListEnd": 517, + "Items": [ + { + "LiteralPos": 494, + "LiteralEnd": 517, + "Literal": "column-matched-by-regex" + } + ] + }, + "ColumnArgList": null + } + } + ] + }, + "Except": { + "ListPos": 527, + "ListEnd": 538, + "Items": [ + { + "LeftParenPos": 527, + "RightParenPos": 538, + "Items": { + "ListPos": 528, + "ListEnd": 538, + "Items": [ + { + "Name": "colX", + "Unquoted": false, + "NamePos": 528, + "NameEnd": 532 + }, + { + "Name": "colY", + "Unquoted": false, + "NamePos": 534, + "NameEnd": 538 + } + ] + }, + "ColumnArgList": null + } + ] + } + } + } +] \ No newline at end of file