-
Notifications
You must be signed in to change notification settings - Fork 28.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-3414][SQL] Replace LowerCaseSchema with Resolver #2382
Changes from 2 commits
5b93711
219805a
2de881e
d4320f1
c21171e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,4 +22,9 @@ package org.apache.spark.sql.catalyst | |
* Analysis consists of translating [[UnresolvedAttribute]]s and [[UnresolvedRelation]]s | ||
* into fully typed objects using information in a schema [[Catalog]]. | ||
*/ | ||
package object analysis | ||
package object analysis { | ||
type Resolver = (String, String) => Boolean | ||
|
||
val caseInsensitiveResolution = (a: String, b: String) => a.toLowerCase == b.toLowerCase | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe |
||
val caseSensitiveResolution = (a: String, b: String) => a == b | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -98,12 +98,12 @@ case class Star( | |
override def withNullability(newNullability: Boolean) = this | ||
override def withQualifiers(newQualifiers: Seq[String]) = this | ||
|
||
def expand(input: Seq[Attribute]): Seq[NamedExpression] = { | ||
def expand(input: Seq[Attribute], resolver: Resolver): Seq[NamedExpression] = { | ||
val expandedAttributes: Seq[Attribute] = table match { | ||
// If there is no table specified, use all input attributes. | ||
case None => input | ||
// If there is a table, pick out attributes that are part of this table. | ||
case Some(t) => input.filter(_.qualifiers contains t) | ||
case Some(t) => input.filter(_.qualifiers.filter(resolver(_,t)).nonEmpty) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: space after There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
} | ||
val mappedAttributes = expandedAttributes.map(mapFunction).zip(input).map { | ||
case (n: NamedExpression, _) => n | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ | |
|
||
package org.apache.spark.sql.catalyst.plans.logical | ||
|
||
import org.apache.spark.sql.catalyst.analysis.Resolver | ||
import org.apache.spark.sql.catalyst.errors.TreeNodeException | ||
import org.apache.spark.sql.catalyst.expressions._ | ||
import org.apache.spark.sql.catalyst.plans.QueryPlan | ||
|
@@ -75,19 +76,23 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] { | |
* nodes of this LogicalPlan. The attribute is expressed as | ||
* as string in the following form: `[scope].AttributeName.[nested].[fields]...`. | ||
*/ | ||
def resolveChildren(name: String): Option[NamedExpression] = | ||
resolve(name, children.flatMap(_.output)) | ||
def resolveChildren(name: String, resolver: Resolver): Option[NamedExpression] = | ||
resolve(name, children.flatMap(_.output), resolver) | ||
|
||
/** | ||
* Optionally resolves the given string to a [[NamedExpression]] based on the output of this | ||
* LogicalPlan. The attribute is expressed as string in the following form: | ||
* `[scope].AttributeName.[nested].[fields]...`. | ||
*/ | ||
def resolve(name: String): Option[NamedExpression] = | ||
resolve(name, output) | ||
def resolve(name: String, resolver: Resolver): Option[NamedExpression] = | ||
resolve(name, output, resolver) | ||
|
||
/** Performs attribute resolution given a name and a sequence of possible attributes. */ | ||
protected def resolve(name: String, input: Seq[Attribute]): Option[NamedExpression] = { | ||
protected def resolve( | ||
name: String, | ||
input: Seq[Attribute], | ||
resolver: Resolver): Option[NamedExpression] = { | ||
|
||
val parts = name.split("\\.") | ||
// Collect all attributes that are output by this nodes children where either the first part | ||
// matches the name or where the first part matches the scope and the second part matches the | ||
|
@@ -96,16 +101,27 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] { | |
val options = input.flatMap { option => | ||
// If the first part of the desired name matches a qualifier for this possible match, drop it. | ||
val remainingParts = | ||
if (option.qualifiers.contains(parts.head) && parts.size > 1) parts.drop(1) else parts | ||
if (option.name == remainingParts.head) (option, remainingParts.tail.toList) :: Nil else Nil | ||
if (option.qualifiers.filter(resolver(_, parts.head)).nonEmpty && parts.size > 1) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
parts.drop(1) | ||
} else { | ||
parts | ||
} | ||
|
||
if (resolver(option.name, remainingParts.head)) { | ||
(option, remainingParts.tail.toList) :: Nil | ||
} else { | ||
Nil | ||
} | ||
} | ||
|
||
options.distinct match { | ||
case Seq((a, Nil)) => Some(a) // One match, no nested fields, use it. | ||
// One match, but we also need to extract the requested nested field. | ||
case Seq((a, nestedFields)) => | ||
Some(Alias(nestedFields.foldLeft(a: Expression)(GetField), nestedFields.last)()) | ||
case Seq() => None // No matches. | ||
case Seq() => | ||
println(s"Could not find $name in ${input.mkString(", ")}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use |
||
None // No matches. | ||
case ambiguousReferences => | ||
throw new TreeNodeException( | ||
this, s"Ambiguous references to $name: ${ambiguousReferences.mkString(",")}") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -57,13 +57,14 @@ class HiveResolutionSuite extends HiveComparisonTest { | |
.registerTempTable("caseSensitivityTest") | ||
|
||
sql("SELECT a, b, A, B, n.a, n.b, n.A, n.B FROM caseSensitivityTest") | ||
} | ||
|
||
println(sql("SELECT * FROM casesensitivitytest one JOIN casesensitivitytest two ON one.a = two.a").queryExecution) | ||
|
||
sql("SELECT * FROM casesensitivitytest one JOIN casesensitivitytest two ON one.a = two.a").collect() | ||
|
||
// TODO: sql("SELECT * FROM casesensitivitytest a JOIN casesensitivitytest b ON a.a = b.a") | ||
ignore("case insensitivity with scala reflection joins") { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why this test case is ignored? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah I see, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I only changed it to an explicit ignore instead of being commented out. We need to decided if this is allowed or not. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hive 0.12 actually supports this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm what is the exception that is thrown, will #2209 fix this? Otherwise we should open a JIRA. |
||
// Test resolution with Scala Reflection | ||
TestHive.sparkContext.parallelize(Data(1, 2, Nested(1,2), Seq(Nested(1,2))) :: Nil) | ||
.registerTempTable("caseSensitivityTest") | ||
|
||
sql("SELECT * FROM casesensitivitytest a JOIN casesensitivitytest b ON a.a = b.a").collect() | ||
} | ||
|
||
test("nested repeated resolution") { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Resolver
probably a general name, can we use a more precise name for this?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this will actually end up providing more general resolution functionality in the long term. I've added some scala doc for clarity though.