From 4c4dc19e29e419400c8894e9b3b1608038b15c18 Mon Sep 17 00:00:00 2001 From: Michael Armbrust Date: Fri, 8 Aug 2014 13:23:51 -0700 Subject: [PATCH] Fix bug with tree splicing. --- .../apache/spark/sql/hive/HiveStrategies.scala | 13 +++++++++++-- .../sql/parquet/ParquetMetastoreSuite.scala | 17 +++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index f2be1eae410ef..d644061215e0c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -45,6 +45,9 @@ private[hive] trait HiveStrategies { * * TODO: Much of this logic is duplicated in HiveTableScan. Ideally we would do some refactoring * but since this is after the code freeze for 1.1 all logic is here to minimize disruption. + * + * Other issues: + * - Much of this logic assumes case insensitive resolution. */ @Experimental object ParquetConversion extends Strategy { @@ -60,8 +63,14 @@ private[hive] trait HiveStrategies { }) } - implicit class PhysicalPlanHacks(s: SparkPlan) { - def fakeOutput(newOutput: Seq[Attribute]) = OutputFaker(newOutput, s) + implicit class PhysicalPlanHacks(originalPlan: SparkPlan) { + def fakeOutput(newOutput: Seq[Attribute]) = + OutputFaker( + originalPlan.output.map(a => + newOutput.find(a.name.toLowerCase == _.name.toLowerCase) + .getOrElse( + sys.error(s"Can't find attribute $a to fake in set ${newOutput.mkString(",")}"))), + originalPlan) } def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala index 9368536a11e5b..0723be7298e15 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/parquet/ParquetMetastoreSuite.scala @@ -103,6 +103,23 @@ class ParquetMetastoreSuite extends QueryTest with BeforeAndAfterAll { ) } + test("project partitioning and non-partitioning columns") { + checkAnswer( + sql("SELECT stringField, p, count(intField) " + + "FROM partitioned_parquet GROUP BY p, stringField"), + ("part-1", 1, 10) :: + ("part-2", 2, 10) :: + ("part-3", 3, 10) :: + ("part-4", 4, 10) :: + ("part-5", 5, 10) :: + ("part-6", 6, 10) :: + ("part-7", 7, 10) :: + ("part-8", 8, 10) :: + ("part-9", 9, 10) :: + ("part-10", 10, 10) :: Nil + ) + } + test("simple count") { checkAnswer( sql("SELECT COUNT(*) FROM partitioned_parquet"),