From 5d9951e01a3462bc081c4b6816c5a8c1e89c6d0f Mon Sep 17 00:00:00 2001
From: jeanlyn <jeanlyn92@gmail.com>
Date: Mon, 5 Jan 2015 00:26:14 +0800
Subject: [PATCH 1/3] SPARK-5068: fix bug query data when path doesn't exists

---
 .../apache/spark/sql/hive/TableReader.scala   |  6 ++-
 .../spark/sql/hive/QueryPartitionSuite.scala  | 49 +++++++++++++++++++
 2 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index c368715f7c6f5..bce835d5ff17b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -141,7 +141,11 @@ class HadoopTableReader(
       partitionToDeserializer: Map[HivePartition,
       Class[_ <: Deserializer]],
       filterOpt: Option[PathFilter]): RDD[Row] = {
-    val hivePartitionRDDs = partitionToDeserializer.map { case (partition, partDeserializer) =>
+    val hivePartitionRDDs = partitionToDeserializer.filter{ case (partition, partDeserializer) =>
+      val partPath = HiveShim.getDataLocationPath(partition)
+      val fs = partPath.getFileSystem(sc.hiveconf)
+      fs.exists(partPath)
+    }.map { case (partition, partDeserializer) =>
       val partDesc = Utilities.getPartitionDesc(partition)
       val partPath = HiveShim.getDataLocationPath(partition)
       val inputPathStr = applyFilterIfNeeded(partPath, filterOpt)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala
new file mode 100644
index 0000000000000..576a9bfd61b6b
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala
@@ -0,0 +1,49 @@
+package org.apache.spark.sql.hive
+
+import java.io.File
+
+import com.google.common.io.Files
+import org.apache.spark.sql.{QueryTest, _}
+import org.apache.spark.sql.hive.test.TestHive
+/* Implicits */
+import org.apache.spark.sql.hive.test.TestHive._
+
+
+class QueryPartitionSuite extends QueryTest {
+
+  test("SPARK-5068: query data when path doesn't exists"){
+    val testData = TestHive.sparkContext.parallelize(
+      (1 to 10).map(i => TestData(i, i.toString)))
+    testData.registerTempTable("testData")
+
+    val tmpDir = Files.createTempDir()
+    //create the table for test
+    sql(s"CREATE TABLE table_with_partition(key int,value string) PARTITIONED by (ds string) location '${tmpDir.toURI.toString}' ")
+    sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='1') SELECT key,value FROM testData")
+    sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='2') SELECT key,value FROM testData")
+    sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='3') SELECT key,value FROM testData")
+    sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='4') SELECT key,value FROM testData")
+    //test for the exist path
+    checkAnswer(sql("select key,value from table_with_partition"),
+      testData.collect.toSeq ++ testData.collect.toSeq ++ testData.collect.toSeq ++ testData.collect.toSeq)
+
+    //delect the path of one partition
+    val folders = tmpDir.listFiles.filter(_.isDirectory).toList
+    def deleteAll(file:File){
+      if(file.isDirectory()){
+        for(f:File <-file.listFiles()){
+          deleteAll(f);
+        }
+      }
+      file.delete();
+    }
+    deleteAll(folders(0))
+
+    //test for the affter delete the path
+    checkAnswer(sql("select key,value from table_with_partition"),
+      testData.collect.toSeq ++ testData.collect.toSeq ++ testData.collect.toSeq)
+
+    sql("DROP TABLE table_with_partition")
+    sql("DROP TABLE createAndInsertTest")
+  }
+}

From 72783003d9986ac2ddc927115b86ec00c371cdb7 Mon Sep 17 00:00:00 2001
From: jeanlyn <jeanlyn92@gmail.com>
Date: Wed, 28 Jan 2015 11:29:21 +0800
Subject: [PATCH 2/3] add the Licensed

---
 .../spark/sql/hive/QueryPartitionSuite.scala  | 23 +++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala
index 576a9bfd61b6b..0562127109ff9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.spark.sql.hive
 
 import java.io.File
@@ -25,7 +42,8 @@ class QueryPartitionSuite extends QueryTest {
     sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='4') SELECT key,value FROM testData")
     //test for the exist path
     checkAnswer(sql("select key,value from table_with_partition"),
-      testData.collect.toSeq ++ testData.collect.toSeq ++ testData.collect.toSeq ++ testData.collect.toSeq)
+      testData.toSchemaRDD.collect ++ testData.toSchemaRDD.collect
+        ++ testData.toSchemaRDD.collect ++ testData.toSchemaRDD.collect)
 
     //delect the path of one partition
     val folders = tmpDir.listFiles.filter(_.isDirectory).toList
@@ -41,7 +59,8 @@ class QueryPartitionSuite extends QueryTest {
 
     //test for the affter delete the path
     checkAnswer(sql("select key,value from table_with_partition"),
-      testData.collect.toSeq ++ testData.collect.toSeq ++ testData.collect.toSeq)
+      testData.toSchemaRDD.collect ++ testData.toSchemaRDD.collect
+        ++ testData.toSchemaRDD.collect)
 
     sql("DROP TABLE table_with_partition")
     sql("DROP TABLE createAndInsertTest")

From 40d1c941fcb0bd9ea64877c1d7046cd9509aa159 Mon Sep 17 00:00:00 2001
From: jeanlyn <jeanlyn92@gmail.com>
Date: Mon, 2 Feb 2015 19:49:46 +0800
Subject: [PATCH 3/3] fix code style

---
 .../src/main/scala/org/apache/spark/sql/hive/TableReader.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index bce835d5ff17b..37fca9c153141 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -141,7 +141,7 @@ class HadoopTableReader(
       partitionToDeserializer: Map[HivePartition,
       Class[_ <: Deserializer]],
       filterOpt: Option[PathFilter]): RDD[Row] = {
-    val hivePartitionRDDs = partitionToDeserializer.filter{ case (partition, partDeserializer) =>
+    val hivePartitionRDDs = partitionToDeserializer.filter { case (partition, partDeserializer) =>
       val partPath = HiveShim.getDataLocationPath(partition)
       val fs = partPath.getFileSystem(sc.hiveconf)
       fs.exists(partPath)