Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an AST visitor #601

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ version = "0.27.0"
authors = ["Andy Grove <andygrove73@gmail.com>"]
homepage = "https://github.com/sqlparser-rs/sqlparser-rs"
documentation = "https://docs.rs/sqlparser/"
keywords = [ "ansi", "sql", "lexer", "parser" ]
keywords = ["ansi", "sql", "lexer", "parser"]
repository = "https://github.com/sqlparser-rs/sqlparser-rs"
license = "Apache-2.0"
include = [
Expand All @@ -32,6 +32,7 @@ serde = { version = "1.0", features = ["derive"], optional = true }
# of dev-dependencies because of
# https://github.com/rust-lang/cargo/issues/1596
serde_json = { version = "1.0", optional = true }
derive-visitor = { version = "0.3.0", optional = true }

[dev-dependencies]
simple_logger = "4.0"
Expand Down
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,25 @@ This outputs
AST: [Query(Query { ctes: [], body: Select(Select { distinct: false, projection: [UnnamedExpr(Identifier("a")), UnnamedExpr(Identifier("b")), UnnamedExpr(Value(Long(123))), UnnamedExpr(Function(Function { name: ObjectName(["myfunc"]), args: [Identifier("b")], over: None, distinct: false }))], from: [TableWithJoins { relation: Table { name: ObjectName(["table_1"]), alias: None, args: [], with_hints: [] }, joins: [] }], selection: Some(BinaryOp { left: BinaryOp { left: Identifier("a"), op: Gt, right: Identifier("b") }, op: And, right: BinaryOp { left: Identifier("b"), op: Lt, right: Value(Long(100)) } }), group_by: [], having: None }), order_by: [OrderByExpr { expr: Identifier("a"), asc: Some(false) }, OrderByExpr { expr: Identifier("b"), asc: None }], limit: None, offset: None, fetch: None })]
```

### Analyzing and transforming the AST

Once you have an abstract syntax tree, you can analyze and transform it
using the optional [`derive-visitor`](https://github.com/nikis05/derive-visitor) feature.

For instance, if you want to rename all identifiers in a query:

```rust
use sqlparser::{dialect::GenericDialect, parser::Parser, ast::Ident};
use derive_visitor::{visitor_enter_fn_mut, DriveMut};

let mut statements = Parser::parse_sql(&GenericDialect, "select xxx").unwrap();
statements[0].drive_mut(&mut visitor_enter_fn_mut(|ident: &mut Ident| {
ident.value = ident.value.replace("xxx", "yyy");
}));
assert_eq!(statements[0].to_string(), "SELECT yyy");
```


## Command line
To parse a file and dump the results as JSON:
```
Expand Down
76 changes: 48 additions & 28 deletions src/ast/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,17 @@ use core::fmt;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

#[cfg(feature = "derive-visitor")]
use derive_visitor::{Drive, DriveMut};

use crate::ast::ObjectName;

use super::value::escape_single_quote_string;

/// SQL data types
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "derive-visitor", derive(Drive, DriveMut))]
pub enum DataType {
/// Fixed-length character type e.g. CHARACTER(10)
Character(Option<CharacterLength>),
Expand All @@ -36,37 +40,37 @@ pub enum DataType {
/// Variable-length character type e.g. VARCHAR(10)
Varchar(Option<CharacterLength>),
/// Variable-length character type e.g. NVARCHAR(10)
Nvarchar(Option<u64>),
Nvarchar(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Uuid type
Uuid,
/// Large character object with optional length e.g. CHARACTER LARGE OBJECT, CHARACTER LARGE OBJECT(1000), [standard]
///
/// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-large-object-type
CharacterLargeObject(Option<u64>),
CharacterLargeObject(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Large character object with optional length e.g. CHAR LARGE OBJECT, CHAR LARGE OBJECT(1000), [standard]
///
/// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-large-object-type
CharLargeObject(Option<u64>),
CharLargeObject(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Large character object with optional length e.g. CLOB, CLOB(1000), [standard]
///
/// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-large-object-type
/// [Oracle]: https://docs.oracle.com/javadb/10.10.1.2/ref/rrefclob.html
Clob(Option<u64>),
Clob(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Fixed-length binary type with optional length e.g. [standard], [MS SQL Server]
///
/// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-string-type
/// [MS SQL Server]: https://learn.microsoft.com/pt-br/sql/t-sql/data-types/binary-and-varbinary-transact-sql?view=sql-server-ver16
Binary(Option<u64>),
Binary(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Variable-length binary with optional length type e.g. [standard], [MS SQL Server]
///
/// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-string-type
/// [MS SQL Server]: https://learn.microsoft.com/pt-br/sql/t-sql/data-types/binary-and-varbinary-transact-sql?view=sql-server-ver16
Varbinary(Option<u64>),
Varbinary(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Large binary object with optional length e.g. BLOB, BLOB(1000), [standard], [Oracle]
///
/// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-large-object-string-type
/// [Oracle]: https://docs.oracle.com/javadb/10.8.3.0/ref/rrefblob.html
Blob(Option<u64>),
Blob(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Numeric type with optional precision and scale e.g. NUMERIC(10,2), [standard][1]
///
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type
Expand All @@ -80,35 +84,35 @@ pub enum DataType {
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type
Dec(ExactNumberInfo),
/// Floating point with optional precision e.g. FLOAT(8)
Float(Option<u64>),
Float(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Tiny integer with optional display width e.g. TINYINT or TINYINT(3)
TinyInt(Option<u64>),
TinyInt(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Unsigned tiny integer with optional display width e.g. TINYINT UNSIGNED or TINYINT(3) UNSIGNED
UnsignedTinyInt(Option<u64>),
UnsignedTinyInt(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Small integer with optional display width e.g. SMALLINT or SMALLINT(5)
SmallInt(Option<u64>),
SmallInt(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Unsigned small integer with optional display width e.g. SMALLINT UNSIGNED or SMALLINT(5) UNSIGNED
UnsignedSmallInt(Option<u64>),
UnsignedSmallInt(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// MySQL medium integer ([1]) with optional display width e.g. MEDIUMINT or MEDIUMINT(5)
///
/// [1]: https://dev.mysql.com/doc/refman/8.0/en/integer-types.html
MediumInt(Option<u64>),
MediumInt(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Unsigned medium integer ([1]) with optional display width e.g. MEDIUMINT UNSIGNED or MEDIUMINT(5) UNSIGNED
///
/// [1]: https://dev.mysql.com/doc/refman/8.0/en/integer-types.html
UnsignedMediumInt(Option<u64>),
UnsignedMediumInt(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Integer with optional display width e.g. INT or INT(11)
Int(Option<u64>),
Int(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Integer with optional display width e.g. INTEGER or INTEGER(11)
Integer(Option<u64>),
Integer(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Unsigned integer with optional display width e.g. INT UNSIGNED or INT(11) UNSIGNED
UnsignedInt(Option<u64>),
UnsignedInt(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Unsigned integer with optional display width e.g. INTGER UNSIGNED or INTEGER(11) UNSIGNED
UnsignedInteger(Option<u64>),
UnsignedInteger(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Big integer with optional display width e.g. BIGINT or BIGINT(20)
BigInt(Option<u64>),
BigInt(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Unsigned big integer with optional display width e.g. BIGINT UNSIGNED or BIGINT(20) UNSIGNED
UnsignedBigInt(Option<u64>),
UnsignedBigInt(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Floating point e.g. REAL
Real,
/// Double
Expand All @@ -125,15 +129,21 @@ pub enum DataType {
/// Time with optional time precision and time zone information e.g. [standard][1].
///
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type
Time(Option<u64>, TimezoneInfo),
Time(
#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>,
#[cfg_attr(feature = "derive-visitor", drive(skip))] TimezoneInfo,
),
/// Datetime with optional time precision e.g. [MySQL][1].
///
/// [1]: https://dev.mysql.com/doc/refman/8.0/en/datetime.html
Datetime(Option<u64>),
Datetime(#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>),
/// Timestamp with optional time precision and time zone information e.g. [standard][1].
///
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#datetime-type
Timestamp(Option<u64>, TimezoneInfo),
Timestamp(
#[cfg_attr(feature = "derive-visitor", drive(skip))] Option<u64>,
#[cfg_attr(feature = "derive-visitor", drive(skip))] TimezoneInfo,
),
/// Interval
Interval,
/// Regclass used in postgresql serial
Expand All @@ -145,13 +155,16 @@ pub enum DataType {
/// Bytea
Bytea,
/// Custom type such as enums
Custom(ObjectName, Vec<String>),
Custom(
ObjectName,
#[cfg_attr(feature = "derive-visitor", drive(skip))] Vec<String>,
),
/// Arrays
Array(Option<Box<DataType>>),
/// Enums
Enum(Vec<String>),
Enum(#[cfg_attr(feature = "derive-visitor", drive(skip))] Vec<String>),
/// Set
Set(Vec<String>),
Set(#[cfg_attr(feature = "derive-visitor", drive(skip))] Vec<String>),
}

impl fmt::Display for DataType {
Expand Down Expand Up @@ -384,13 +397,17 @@ impl fmt::Display for TimezoneInfo {
/// [standard]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#exact-numeric-type
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "derive-visitor", derive(Drive, DriveMut))]
pub enum ExactNumberInfo {
/// No additional information e.g. `DECIMAL`
None,
/// Only precision information e.g. `DECIMAL(10)`
Precision(u64),
Precision(#[cfg_attr(feature = "derive-visitor", drive(skip))] u64),
/// Precision and scale information e.g. `DECIMAL(10,2)`
PrecisionAndScale(u64, u64),
PrecisionAndScale(
#[cfg_attr(feature = "derive-visitor", drive(skip))] u64,
#[cfg_attr(feature = "derive-visitor", drive(skip))] u64,
),
}

impl fmt::Display for ExactNumberInfo {
Expand All @@ -414,8 +431,10 @@ impl fmt::Display for ExactNumberInfo {
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-length
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "derive-visitor", derive(Drive, DriveMut))]
pub struct CharacterLength {
/// Default (if VARYING) or maximum (if not VARYING) length
#[cfg_attr(feature = "derive-visitor", drive(skip))]
pub length: u64,
/// Optional unit. If not informed, the ANSI handles it as CHARACTERS implicitly
pub unit: Option<CharLengthUnits>,
Expand All @@ -436,6 +455,7 @@ impl fmt::Display for CharacterLength {
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#char-length-units
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "derive-visitor", derive(Drive, DriveMut))]
pub enum CharLengthUnits {
/// CHARACTERS unit
Characters,
Expand Down
24 changes: 23 additions & 1 deletion src/ast/ddl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,36 @@ use core::fmt;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

#[cfg(feature = "derive-visitor")]
use derive_visitor::{Drive, DriveMut};

use crate::ast::value::escape_single_quote_string;
use crate::ast::{display_comma_separated, display_separated, DataType, Expr, Ident, ObjectName};
use crate::tokenizer::Token;

/// An `ALTER TABLE` (`Statement::AlterTable`) operation
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "derive-visitor", derive(Drive, DriveMut))]
pub enum AlterTableOperation {
/// `ADD <table_constraint>`
AddConstraint(TableConstraint),
/// `ADD [ COLUMN ] <column_def>`
AddColumn { column_def: ColumnDef },
/// `DROP CONSTRAINT [ IF EXISTS ] <name>`
DropConstraint {
#[cfg_attr(feature = "derive-visitor", drive(skip))]
if_exists: bool,
name: Ident,
#[cfg_attr(feature = "derive-visitor", drive(skip))]
cascade: bool,
},
/// `DROP [ COLUMN ] [ IF EXISTS ] <column_name> [ CASCADE ]`
DropColumn {
column_name: Ident,
#[cfg_attr(feature = "derive-visitor", drive(skip))]
if_exists: bool,
#[cfg_attr(feature = "derive-visitor", drive(skip))]
cascade: bool,
},
/// `DROP PRIMARY KEY`
Expand All @@ -55,11 +63,13 @@ pub enum AlterTableOperation {
},
/// Add Partitions
AddPartitions {
#[cfg_attr(feature = "derive-visitor", drive(skip))]
if_not_exists: bool,
new_partitions: Vec<Expr>,
},
DropPartitions {
partitions: Vec<Expr>,
#[cfg_attr(feature = "derive-visitor", drive(skip))]
if_exists: bool,
},
/// `RENAME [ COLUMN ] <old_column_name> TO <new_column_name>`
Expand Down Expand Up @@ -183,6 +193,7 @@ impl fmt::Display for AlterTableOperation {
/// An `ALTER COLUMN` (`Statement::AlterTable`) operation
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "derive-visitor", derive(Drive, DriveMut))]
pub enum AlterColumnOperation {
/// `SET NOT NULL`
SetNotNull,
Expand Down Expand Up @@ -226,12 +237,14 @@ impl fmt::Display for AlterColumnOperation {
/// `ALTER TABLE ADD <constraint>` statement.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "derive-visitor", derive(Drive, DriveMut))]
pub enum TableConstraint {
/// `[ CONSTRAINT <name> ] { PRIMARY KEY | UNIQUE } (<columns>)`
Unique {
name: Option<Ident>,
columns: Vec<Ident>,
/// Whether this is a `PRIMARY KEY` or just a `UNIQUE` constraint
#[cfg_attr(feature = "derive-visitor", drive(skip))]
is_primary: bool,
},
/// A referential integrity constraint (`[ CONSTRAINT <name> ] FOREIGN KEY (<columns>)
Expand Down Expand Up @@ -260,6 +273,7 @@ pub enum TableConstraint {
/// [1]: https://dev.mysql.com/doc/refman/8.0/en/create-table.html
Index {
/// Whether this index starts with KEY (true) or INDEX (false), to maintain the same syntax.
#[cfg_attr(feature = "derive-visitor", drive(skip))]
display_as_key: bool,
/// Index name.
name: Option<Ident>,
Expand All @@ -284,6 +298,7 @@ pub enum TableConstraint {
/// [1]: https://dev.mysql.com/doc/refman/8.0/en/fulltext-natural-language.html
FulltextOrSpatial {
/// Whether this is a `FULLTEXT` (true) or `SPATIAL` (false) definition.
#[cfg_attr(feature = "derive-visitor", drive(skip))]
fulltext: bool,
/// Whether the type is followed by the keyword `KEY`, `INDEX`, or no keyword at all.
index_type_display: KeyOrIndexDisplay,
Expand Down Expand Up @@ -389,6 +404,7 @@ impl fmt::Display for TableConstraint {
/// [1]: https://dev.mysql.com/doc/refman/8.0/en/create-table.html
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "derive-visitor", derive(Drive, DriveMut))]
pub enum KeyOrIndexDisplay {
/// Nothing to display
None,
Expand Down Expand Up @@ -424,6 +440,7 @@ impl fmt::Display for KeyOrIndexDisplay {
/// [3]: https://www.postgresql.org/docs/14/sql-createindex.html
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "derive-visitor", derive(Drive, DriveMut))]
pub enum IndexType {
BTree,
Hash,
Expand All @@ -442,6 +459,7 @@ impl fmt::Display for IndexType {
/// SQL column definition
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "derive-visitor", derive(Drive, DriveMut))]
pub struct ColumnDef {
pub name: Ident,
pub data_type: DataType,
Expand Down Expand Up @@ -477,6 +495,7 @@ impl fmt::Display for ColumnDef {
/// "column options," and we allow any column option to be named.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "derive-visitor", derive(Drive, DriveMut))]
pub struct ColumnOptionDef {
pub name: Option<Ident>,
pub option: ColumnOption,
Expand All @@ -492,6 +511,7 @@ impl fmt::Display for ColumnOptionDef {
/// TABLE` statement.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "derive-visitor", derive(Drive, DriveMut))]
pub enum ColumnOption {
/// `NULL`
Null,
Expand All @@ -501,6 +521,7 @@ pub enum ColumnOption {
Default(Expr),
/// `{ PRIMARY KEY | UNIQUE }`
Unique {
#[cfg_attr(feature = "derive-visitor", drive(skip))]
is_primary: bool,
},
/// A referential integrity constraint (`[FOREIGN KEY REFERENCES
Expand All @@ -521,7 +542,7 @@ pub enum ColumnOption {
/// - ...
DialectSpecific(Vec<Token>),
CharacterSet(ObjectName),
Comment(String),
Comment(#[cfg_attr(feature = "derive-visitor", drive(skip))] String),
}

impl fmt::Display for ColumnOption {
Expand Down Expand Up @@ -579,6 +600,7 @@ fn display_constraint_name(name: &'_ Option<Ident>) -> impl fmt::Display + '_ {
/// Used in foreign key constraints in `ON UPDATE` and `ON DELETE` options.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "derive-visitor", derive(Drive, DriveMut))]
pub enum ReferentialAction {
Restrict,
Cascade,
Expand Down
Loading