Skip to content

Commit

Permalink
[SPARK-27209][SQL] Split parsing of SELECT and INSERT into two top-le…
Browse files Browse the repository at this point in the history
…vel rules in the grammar file.

Currently in the grammar file the rule `query` is responsible to parse both select and insert statements. As a result, we need to have more semantic checks in the code to guard against in-valid insert constructs in a query. Couple of examples are in the `visitCreateView` and `visitAlterView` functions. One other issue is that, we don't catch the `invalid insert constructs` in all the places until checkAnalysis (the errors we raise can be confusing as well). Here are couple of examples :

```SQL
select * from (insert into bar values (2));
```
```
Error in query: unresolved operator 'Project [*];
'Project [*]
+- SubqueryAlias `__auto_generated_subquery_name`
   +- InsertIntoHiveTable `default`.`bar`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, false, false, [c1]
      +- Project [cast(col1#18 as int) AS c1#20]
         +- LocalRelation [col1#18]
```

```SQL
select * from foo where c1 in (insert into bar values (2))
```
```
Error in query: cannot resolve '(default.foo.`c1` IN (listquery()))' due to data type mismatch:
The number of columns in the left hand side of an IN subquery does not match the
number of columns in the output of subquery.

Left side columns:
[default.foo.`c1`].
Right side columns:
[].;;
'Project [*]
+- 'Filter c1#6 IN (list#5 [])
   :  +- InsertIntoHiveTable `default`.`bar`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, false, false, [c1]
   :     +- Project [cast(col1#7 as int) AS c1#9]
   :        +- LocalRelation [col1#7]
   +- SubqueryAlias `default`.`foo`
      +- HiveTableRelation `default`.`foo`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [c1#6]
```

For both the cases above, we should reject the syntax at parser level.

In this PR, we create two top-level parser rules to parse `SELECT` and `INSERT` respectively.
I will create a small PR to allow CTEs in DESCRIBE QUERY after this PR is in.
Added tests to PlanParserSuite and removed the semantic check tests from SparkSqlParserSuites.

Closes apache#24150 from dilipbiswal/split-query-insert.

Authored-by: Dilip Biswal <dbiswal@us.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
dilipbiswal authored and mccheah committed May 15, 2019
1 parent 8f9c5ac commit c46db75
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 66 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ singleTableSchema

statement
: query #statementDefault
| insertStatement #insertStatementDefault
| multiSelectStatement #multiSelectStatementDefault
| USE db=identifier #use
| CREATE DATABASE (IF NOT EXISTS)? identifier
(COMMENT comment=STRING)? locationSpec?
Expand Down Expand Up @@ -343,9 +345,14 @@ resource
: identifier STRING
;

insertStatement
: (ctes)? insertInto queryTerm queryOrganization #singleInsertQuery
| (ctes)? fromClause multiInsertQueryBody+ #multiInsertQuery
;

queryNoWith
: insertInto? queryTerm queryOrganization #singleInsertQuery
| fromClause multiInsertQueryBody+ #multiInsertQuery
: queryTerm queryOrganization #noWithQuery
| fromClause selectStatement #queryWithFrom
;

queryOrganization
Expand All @@ -358,9 +365,15 @@ queryOrganization
;

multiInsertQueryBody
: insertInto?
querySpecification
queryOrganization
: insertInto selectStatement
;

multiSelectStatement
: (ctes)? fromClause selectStatement+ #multiSelect
;

selectStatement
: querySpecification queryOrganization
;

queryTerm
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,34 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
val query = plan(ctx.queryNoWith)

// Apply CTEs
query.optional(ctx.ctes) {
val ctes = ctx.ctes.namedQuery.asScala.map { nCtx =>
val namedQuery = visitNamedQuery(nCtx)
(namedQuery.alias, namedQuery)
}
// Check for duplicate names.
checkDuplicateKeys(ctes, ctx)
With(query, ctes)
query.optionalMap(ctx.ctes)(withCTE)
}

private def withCTE(ctx: CtesContext, plan: LogicalPlan): LogicalPlan = {
val ctes = ctx.namedQuery.asScala.map { nCtx =>
val namedQuery = visitNamedQuery(nCtx)
(namedQuery.alias, namedQuery)
}
// Check for duplicate names.
checkDuplicateKeys(ctes, ctx)
With(plan, ctes)
}

override def visitQueryToDesc(ctx: QueryToDescContext): LogicalPlan = withOrigin(ctx) {
plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses)
}

override def visitQueryWithFrom(ctx: QueryWithFromContext): LogicalPlan = withOrigin(ctx) {
val from = visitFromClause(ctx.fromClause)
validate(ctx.selectStatement.querySpecification.fromClause == null,
"Individual select statement can not have FROM cause as its already specified in the" +
" outer query block", ctx)
withQuerySpecification(ctx.selectStatement.querySpecification, from).
optionalMap(ctx.selectStatement.queryOrganization)(withQueryResultClauses)
}

override def visitNoWithQuery(ctx: NoWithQueryContext): LogicalPlan = withOrigin(ctx) {
plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses)
}

/**
Expand Down Expand Up @@ -151,36 +170,60 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
val from = visitFromClause(ctx.fromClause)

// Build the insert clauses.
val inserts = ctx.multiInsertQueryBody.asScala.map {
val inserts = ctx.multiInsertQueryBody().asScala.map {
body =>
validate(body.querySpecification.fromClause == null,
validate(body.selectStatement.querySpecification.fromClause == null,
"Multi-Insert queries cannot have a FROM clause in their individual SELECT statements",
body)

withInsertInto(body.insertInto,
withQuerySpecification(body.selectStatement.querySpecification, from).
// Add organization statements.
optionalMap(body.selectStatement.queryOrganization)(withQueryResultClauses))
}

// If there are multiple INSERTS just UNION them together into one query.
val insertPlan = inserts match {
case Seq(query) => query
case queries => Union(queries)
}
// Apply CTEs
insertPlan.optionalMap(ctx.ctes)(withCTE)
}

override def visitMultiSelect(ctx: MultiSelectContext): LogicalPlan = withOrigin(ctx) {
val from = visitFromClause(ctx.fromClause)

// Build the insert clauses.
val selects = ctx.selectStatement.asScala.map {
body =>
validate(body.querySpecification.fromClause == null,
"Multi-select queries cannot have a FROM clause in their individual SELECT statements",
body)

withQuerySpecification(body.querySpecification, from).
// Add organization statements.
optionalMap(body.queryOrganization)(withQueryResultClauses).
// Add insert.
optionalMap(body.insertInto())(withInsertInto)
optionalMap(body.queryOrganization)(withQueryResultClauses)
}

// If there are multiple INSERTS just UNION them together into one query.
inserts match {
val selectUnionPlan = selects match {
case Seq(query) => query
case queries => Union(queries)
}
// Apply CTEs
selectUnionPlan.optionalMap(ctx.ctes)(withCTE)
}

/**
* Create a logical plan for a regular (single-insert) query.
*/
override def visitSingleInsertQuery(
ctx: SingleInsertQueryContext): LogicalPlan = withOrigin(ctx) {
plan(ctx.queryTerm).
// Add organization statements.
optionalMap(ctx.queryOrganization)(withQueryResultClauses).
// Add insert.
optionalMap(ctx.insertInto())(withInsertInto)
val insertPlan = withInsertInto(ctx.insertInto(),
plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses))
// Apply CTEs
insertPlan.optionalMap(ctx.ctes)(withCTE)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,11 @@ class PlanParserSuite extends AnalysisTest {
table("a").select(star()).union(table("a").where('s < 10).select(star())))
intercept(
"from a select * select * from x where a.s < 10",
"Multi-Insert queries cannot have a FROM clause in their individual SELECT statements")
"Multi-select queries cannot have a FROM clause in their individual SELECT statements")
intercept(
"from a select * from b",
"Individual select statement can not have FROM cause as its already specified in " +
"the outer query block")
assertEqual(
"from a insert into tbl1 select * insert into tbl2 select * where s < 10",
table("a").select(star()).insertInto("tbl1").union(
Expand Down Expand Up @@ -753,4 +757,47 @@ class PlanParserSuite extends AnalysisTest {
assertEqual(query2, Distinct(a.union(b)).except(c.intersect(d, isAll = true), isAll = true))
}
}

test("create/alter view as insert into table") {
val m1 = intercept[ParseException] {
parsePlan("CREATE VIEW testView AS INSERT INTO jt VALUES(1, 1)")
}.getMessage
assert(m1.contains("mismatched input 'INSERT' expecting"))
// Multi insert query
val m2 = intercept[ParseException] {
parsePlan(
"""
|CREATE VIEW testView AS FROM jt
|INSERT INTO tbl1 SELECT * WHERE jt.id < 5
|INSERT INTO tbl2 SELECT * WHERE jt.id > 4
""".stripMargin)
}.getMessage
assert(m2.contains("mismatched input 'INSERT' expecting"))
val m3 = intercept[ParseException] {
parsePlan("ALTER VIEW testView AS INSERT INTO jt VALUES(1, 1)")
}.getMessage
assert(m3.contains("mismatched input 'INSERT' expecting"))
// Multi insert query
val m4 = intercept[ParseException] {
parsePlan(
"""
|ALTER VIEW testView AS FROM jt
|INSERT INTO tbl1 SELECT * WHERE jt.id < 5
|INSERT INTO tbl2 SELECT * WHERE jt.id > 4
""".stripMargin
)
}.getMessage
assert(m4.contains("mismatched input 'INSERT' expecting"))
}

test("Invalid insert constructs in the query") {
val m1 = intercept[ParseException] {
parsePlan("SELECT * FROM (INSERT INTO BAR VALUES (2))")
}.getMessage
assert(m1.contains("mismatched input 'FROM' expecting"))
val m2 = intercept[ParseException] {
parsePlan("SELECT * FROM S WHERE C1 IN (INSERT INTO T VALUES (2))")
}.getMessage
assert(m2.contains("mismatched input 'FROM' expecting"))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1431,15 +1431,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
if (ctx.identifierList != null) {
operationNotAllowed("CREATE VIEW ... PARTITIONED ON", ctx)
} else {
// CREATE VIEW ... AS INSERT INTO is not allowed.
ctx.query.queryNoWith match {
case s: SingleInsertQueryContext if s.insertInto != null =>
operationNotAllowed("CREATE VIEW ... AS INSERT INTO", ctx)
case _: MultiInsertQueryContext =>
operationNotAllowed("CREATE VIEW ... AS FROM ... [INSERT INTO ...]+", ctx)
case _ => // OK
}

val userSpecifiedColumns = Option(ctx.identifierCommentList).toSeq.flatMap { icl =>
icl.identifierComment.asScala.map { ic =>
ic.identifier.getText -> Option(ic.STRING).map(string)
Expand Down Expand Up @@ -1476,14 +1467,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
* }}}
*/
override def visitAlterViewQuery(ctx: AlterViewQueryContext): LogicalPlan = withOrigin(ctx) {
// ALTER VIEW ... AS INSERT INTO is not allowed.
ctx.query.queryNoWith match {
case s: SingleInsertQueryContext if s.insertInto != null =>
operationNotAllowed("ALTER VIEW ... AS INSERT INTO", ctx)
case _: MultiInsertQueryContext =>
operationNotAllowed("ALTER VIEW ... AS FROM ... [INSERT INTO ...]+", ctx)
case _ => // OK
}
AlterViewAsCommand(
name = visitTableIdentifier(ctx.tableIdentifier),
originalText = source(ctx.query),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,30 +215,6 @@ class SparkSqlParserSuite extends AnalysisTest {
"no viable alternative at input")
}

test("create table using - schema") {
assertEqual("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) USING parquet",
createTableUsing(
table = "my_tab",
schema = (new StructType)
.add("a", IntegerType, nullable = true, "test")
.add("b", StringType)
)
)
intercept("CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING) USING parquet",
"no viable alternative at input")
}

test("create view as insert into table") {
// Single insert query
intercept("CREATE VIEW testView AS INSERT INTO jt VALUES(1, 1)",
"Operation not allowed: CREATE VIEW ... AS INSERT INTO")

// Multi insert query
intercept("CREATE VIEW testView AS FROM jt INSERT INTO tbl1 SELECT * WHERE jt.id < 5 " +
"INSERT INTO tbl2 SELECT * WHERE jt.id > 4",
"Operation not allowed: CREATE VIEW ... AS FROM ... [INSERT INTO ...]+")
}

test("SPARK-17328 Fix NPE with EXPLAIN DESCRIBE TABLE") {
assertEqual("describe table t",
DescribeTableCommand(
Expand Down Expand Up @@ -377,6 +353,18 @@ class SparkSqlParserSuite extends AnalysisTest {
Project(UnresolvedAlias(concat) :: Nil, UnresolvedRelation(TableIdentifier("t"))))
}

<<<<<<< HEAD
test("SPARK-25046 Fix Alter View ... As Insert Into Table") {
// Single insert query
intercept("ALTER VIEW testView AS INSERT INTO jt VALUES(1, 1)",
"Operation not allowed: ALTER VIEW ... AS INSERT INTO")

// Multi insert query
intercept("ALTER VIEW testView AS FROM jt INSERT INTO tbl1 SELECT * WHERE jt.id < 5 " +
"INSERT INTO tbl2 SELECT * WHERE jt.id > 4",
"Operation not allowed: ALTER VIEW ... AS FROM ... [INSERT INTO ...]+")
}
||||||| parent of 9cc925cda2... [SPARK-27209][SQL] Split parsing of SELECT and INSERT into two top-level rules in the grammar file.
test("SPARK-25046 Fix Alter View ... As Insert Into Table") {
// Single insert query
intercept("ALTER VIEW testView AS INSERT INTO jt VALUES(1, 1)",
Expand All @@ -387,4 +375,21 @@ class SparkSqlParserSuite extends AnalysisTest {
"INSERT INTO tbl2 SELECT * WHERE jt.id > 4",
"Operation not allowed: ALTER VIEW ... AS FROM ... [INSERT INTO ...]+")
}

test("database and schema tokens are interchangeable") {
assertEqual("CREATE DATABASE foo", parser.parsePlan("CREATE SCHEMA foo"))
assertEqual("DROP DATABASE foo", parser.parsePlan("DROP SCHEMA foo"))
assertEqual("ALTER DATABASE foo SET DBPROPERTIES ('x' = 'y')",
parser.parsePlan("ALTER SCHEMA foo SET DBPROPERTIES ('x' = 'y')"))
assertEqual("DESC DATABASE foo", parser.parsePlan("DESC SCHEMA foo"))
}
=======
test("database and schema tokens are interchangeable") {
assertEqual("CREATE DATABASE foo", parser.parsePlan("CREATE SCHEMA foo"))
assertEqual("DROP DATABASE foo", parser.parsePlan("DROP SCHEMA foo"))
assertEqual("ALTER DATABASE foo SET DBPROPERTIES ('x' = 'y')",
parser.parsePlan("ALTER SCHEMA foo SET DBPROPERTIES ('x' = 'y')"))
assertEqual("DESC DATABASE foo", parser.parsePlan("DESC SCHEMA foo"))
}
>>>>>>> 9cc925cda2... [SPARK-27209][SQL] Split parsing of SELECT and INSERT into two top-level rules in the grammar file.
}

0 comments on commit c46db75

Please sign in to comment.