NVIDIA · andygrove · Apr 8, 2022 · Apr 7, 2022
diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -1279,6 +1279,22 @@ def test_hash_groupby_approx_percentile_reduction(aqe_enabled):
         lambda spark: gen_df(spark, [('v', DoubleGen())], length=100),
         [0.05, 0.25, 0.5, 0.75, 0.95], conf, reduction = True)
 
+@incompat
+@pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
+def test_hash_groupby_approx_percentile_reduction_single_row(aqe_enabled):
+    conf = {'spark.sql.adaptive.enabled': aqe_enabled}
+    compare_percentile_approx(
+        lambda spark: gen_df(spark, [('v', DoubleGen())], length=1),
+        [0.05, 0.25, 0.5, 0.75, 0.95], conf, reduction = True)
+
+@incompat
+@pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
+def test_hash_groupby_approx_percentile_reduction_no_rows(aqe_enabled):
+    conf = {'spark.sql.adaptive.enabled': aqe_enabled}
+    compare_percentile_approx(
+        lambda spark: gen_df(spark, [('v', DoubleGen())], length=0),
+        [0.05, 0.25, 0.5, 0.75, 0.95], conf, reduction = True)
+
 @incompat
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
 def test_hash_groupby_approx_percentile_byte(aqe_enabled):

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuApproximatePercentile.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuApproximatePercentile.scala
@@ -21,6 +21,7 @@ import ai.rapids.cudf.{DType, GroupByAggregation, ReductionAggregation}
 import com.nvidia.spark.rapids.GpuCast.doCast
 import com.nvidia.spark.rapids.shims.ShimExpression
 
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
 import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile
 import org.apache.spark.sql.catalyst.util.ArrayData
@@ -64,9 +65,12 @@ case class GpuApproximatePercentile (
   // Attributes of fields in the aggregation buffer.
   override def aggBufferAttributes: Seq[AttributeReference] = outputBuf :: Nil
 
-  // initialValues is only used in reduction and this is not currently supported
-  override lazy val initialValues: Seq[GpuExpression] = throw new UnsupportedOperationException(
-    "approx_percentile does not support reduction")
+  override lazy val initialValues: Seq[GpuLiteral] = Seq(GpuLiteral(
+    InternalRow(
+      ArrayData.toArrayData(Array.empty), // centroids (mean, weight)
+      0d, // min
+      0d), // max
+    CudfTDigest.dataType))
 
   // the update expression will create a t-digest (List[Struct[Double, Double])
   override lazy val updateAggregates: Seq[CudfAggregate] =