From f85e21d052b071377d11696f565153358e33a112 Mon Sep 17 00:00:00 2001
From: Wei He <weihe@meta.com>
Date: Mon, 8 Jul 2024 20:21:04 -0700
Subject: [PATCH] Extend ApproxPercentileResultVerifier for window fuzzer
 (#10367)

Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/10367

Reviewed By: kevinwilfong

Differential Revision: D59257657
---
 velox/exec/fuzzer/FuzzerUtil.h                |  10 +
 velox/exec/fuzzer/ResultVerifier.h            |   2 +
 velox/exec/fuzzer/WindowFuzzer.cpp            |  19 +-
 velox/exec/fuzzer/WindowFuzzer.h              |   9 +-
 .../fuzzer/ApproxDistinctResultVerifier.h     |   1 +
 .../fuzzer/ApproxPercentileResultVerifier.h   | 479 +++++++++++++++++-
 .../prestosql/fuzzer/WindowFuzzerTest.cpp     |   5 +-
 7 files changed, 492 insertions(+), 33 deletions(-)
diff --git a/velox/exec/fuzzer/FuzzerUtil.h b/velox/exec/fuzzer/FuzzerUtil.h
index 81efca5f9023..09c00c66a033 100644
--- a/velox/exec/fuzzer/FuzzerUtil.h
+++ b/velox/exec/fuzzer/FuzzerUtil.h
@@ -13,12 +13,22 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#pragma once
 
+#include "velox/core/PlanNode.h"
 #include "velox/exec/Split.h"
 
 namespace facebook::velox::exec::test {
 const std::string kHiveConnectorId = "test-hive";
 
+struct SortingKeyAndOrder {
+  const std::string key_;
+  const core::SortOrder sortOrder_;
+
+  SortingKeyAndOrder(std::string key, core::SortOrder sortOrder)
+      : key_(std::move(key)), sortOrder_(std::move(sortOrder)) {}
+};
+
 /// Write the vector to the path.
 void writeToFile(
     const std::string& path,
diff --git a/velox/exec/fuzzer/ResultVerifier.h b/velox/exec/fuzzer/ResultVerifier.h
index 36cf9c171ac2..bda5ccecc7f1 100644
--- a/velox/exec/fuzzer/ResultVerifier.h
+++ b/velox/exec/fuzzer/ResultVerifier.h
@@ -18,6 +18,7 @@
 #include <string>
 
 #include "velox/core/PlanNode.h"
+#include "velox/exec/fuzzer/FuzzerUtil.h"
 #include "velox/vector/ComplexVector.h"
 
 namespace facebook::velox::exec::test {
@@ -61,6 +62,7 @@ class ResultVerifier {
   virtual void initializeWindow(
       const std::vector<RowVectorPtr>& /*input*/,
       const std::vector<std::string>& /*partitionByKeys*/,
+      const std::vector<SortingKeyAndOrder>& /*sortingKeysAndOrders*/,
       const core::WindowNode::Function& /*function*/,
       const std::string& /*frame*/,
       const std::string& /*windowName*/) {
diff --git a/velox/exec/fuzzer/WindowFuzzer.cpp b/velox/exec/fuzzer/WindowFuzzer.cpp
index 8f9795534587..734e87a702ac 100644
--- a/velox/exec/fuzzer/WindowFuzzer.cpp
+++ b/velox/exec/fuzzer/WindowFuzzer.cpp
@@ -177,8 +177,7 @@ std::string WindowFuzzer::getFrame(
   return frame.str();
 }
 
-std::vector<WindowFuzzer::SortingKeyAndOrder>
-WindowFuzzer::generateSortingKeysAndOrders(
+std::vector<SortingKeyAndOrder> WindowFuzzer::generateSortingKeysAndOrders(
     const std::string& prefix,
     std::vector<std::string>& names,
     std::vector<TypePtr>& types) {
@@ -359,11 +358,17 @@ void initializeVerifier(
     const std::shared_ptr<ResultVerifier>& customVerifier,
     const std::vector<RowVectorPtr>& input,
     const std::vector<std::string>& partitionKeys,
+    const std::vector<SortingKeyAndOrder>& sortingKeysAndOrders,
     const std::string& frame) {
   const auto& windowNode =
       std::dynamic_pointer_cast<const core::WindowNode>(plan);
   customVerifier->initializeWindow(
-      input, partitionKeys, windowNode->windowFunctions()[0], frame, "w0");
+      input,
+      partitionKeys,
+      sortingKeysAndOrders,
+      windowNode->windowFunctions()[0],
+      frame,
+      "w0");
 }
 } // namespace
 
@@ -424,7 +429,13 @@ bool WindowFuzzer::verifyWindow(
         VELOX_CHECK(
             customVerifier->supportsVerify(),
             "Window fuzzer only uses custom verify() methods.");
-        initializeVerifier(plan, customVerifier, input, partitionKeys, frame);
+        initializeVerifier(
+            plan,
+            customVerifier,
+            input,
+            partitionKeys,
+            sortingKeysAndOrders,
+            frame);
         customVerifier->verify(resultOrError.result);
       }
     }
diff --git a/velox/exec/fuzzer/WindowFuzzer.h b/velox/exec/fuzzer/WindowFuzzer.h
index bf36de8a9e3d..f53e26098fb4 100644
--- a/velox/exec/fuzzer/WindowFuzzer.h
+++ b/velox/exec/fuzzer/WindowFuzzer.h
@@ -18,6 +18,7 @@
 #include "velox/exec/Aggregate.h"
 #include "velox/exec/WindowFunction.h"
 #include "velox/exec/fuzzer/AggregationFuzzerBase.h"
+#include "velox/exec/fuzzer/FuzzerUtil.h"
 #include "velox/exec/fuzzer/PrestoQueryRunner.h"
 #include "velox/exec/fuzzer/ReferenceQueryRunner.h"
 #include "velox/vector/fuzzer/VectorFuzzer.h"
@@ -76,14 +77,6 @@ class WindowFuzzer : public AggregationFuzzerBase {
   void go(const std::string& planPath);
 
  private:
-  struct SortingKeyAndOrder {
-    const std::string key_;
-    const core::SortOrder sortOrder_;
-
-    SortingKeyAndOrder(std::string key, core::SortOrder sortOrder)
-        : key_(std::move(key)), sortOrder_(std::move(sortOrder)) {}
-  };
-
   void addWindowFunctionSignatures(const WindowFunctionMap& signatureMap);
 
   // Return a randomly generated frame clause string together with a boolean
diff --git a/velox/functions/prestosql/fuzzer/ApproxDistinctResultVerifier.h b/velox/functions/prestosql/fuzzer/ApproxDistinctResultVerifier.h
index 95295010aa19..32c48d84d226 100644
--- a/velox/functions/prestosql/fuzzer/ApproxDistinctResultVerifier.h
+++ b/velox/functions/prestosql/fuzzer/ApproxDistinctResultVerifier.h
@@ -64,6 +64,7 @@ class ApproxDistinctResultVerifier : public ResultVerifier {
   void initializeWindow(
       const std::vector<RowVectorPtr>& input,
       const std::vector<std::string>& partitionByKeys,
+      const std::vector<SortingKeyAndOrder>& /*sortingKeysAndOrders*/,
       const core::WindowNode::Function& function,
       const std::string& frame,
       const std::string& windowName) override {
diff --git a/velox/functions/prestosql/fuzzer/ApproxPercentileResultVerifier.h b/velox/functions/prestosql/fuzzer/ApproxPercentileResultVerifier.h
index 6059f112e312..0ef745d8d50b 100644
--- a/velox/functions/prestosql/fuzzer/ApproxPercentileResultVerifier.h
+++ b/velox/functions/prestosql/fuzzer/ApproxPercentileResultVerifier.h
@@ -45,26 +45,19 @@ class ApproxPercentileResultVerifier : public ResultVerifier {
       const core::AggregationNode::Aggregate& aggregate,
       const std::string& aggregateName) override {
     VELOX_CHECK(!input.empty());
+    verifyWindow_ = false;
 
     int64_t numInputs = 0;
     for (const auto& v : input) {
       numInputs += v->size();
     }
 
-    const auto& args = aggregate.call->inputs();
-    const auto& valueField = fieldName(args[0]);
-    std::optional<std::string> weightField;
-    if (args.size() >= 3 && args[1]->type()->isBigint()) {
-      weightField = fieldName(args[1]);
-    }
-
     groupingKeys_ = groupingKeys;
     name_ = aggregateName;
 
-    percentiles_ = extractPercentiles(input, aggregate);
-    VELOX_CHECK(!percentiles_.empty());
-
-    accuracy_ = extractAccuracy(aggregate, input[0]);
+    const auto& [valueField, weightField] =
+        extractValueAndWeight(aggregate.call);
+    extractPercentileAndAccuracy(aggregate.call, input);
 
     // Compute percentiles for all values.
     allRanges_ =
@@ -72,6 +65,34 @@ class ApproxPercentileResultVerifier : public ResultVerifier {
     VELOX_CHECK_LE(allRanges_->size(), numInputs);
   }
 
+  void initializeWindow(
+      const std::vector<RowVectorPtr>& input,
+      const std::vector<std::string>& partitionByKeys,
+      const std::vector<SortingKeyAndOrder>& sortingKeysAndOrders,
+      const core::WindowNode::Function& function,
+      const std::string& /*frame*/,
+      const std::string& windowName) override {
+    VELOX_CHECK(!input.empty());
+    verifyWindow_ = true;
+
+    groupingKeys_ = partitionByKeys;
+    name_ = windowName;
+
+    const auto& [valueField, weightField] =
+        extractValueAndWeight(function.functionCall);
+    bool isArrayPercentile =
+        extractPercentileAndAccuracy(function.functionCall, input);
+
+    allRanges_ = computePercentilesForWindow(
+        input,
+        valueField,
+        weightField,
+        sortingKeysAndOrders,
+        function.frame,
+        function.functionCall->type(),
+        isArrayPercentile);
+  }
+
   bool compare(
       const RowVectorPtr& /*result*/,
       const RowVectorPtr& /*altResult*/) override {
@@ -80,21 +101,27 @@ class ApproxPercentileResultVerifier : public ResultVerifier {
 
   bool verify(const RowVectorPtr& result) override {
     // Compute acceptable ranges of percentiles for each value in 'result'.
-    auto ranges = getPercentileRanges(result);
-    // VELOX_CHECK_EQ(ranges->size(), result->size() * percentiles_.size());
+    RowVectorPtr ranges;
+    if (verifyWindow_) {
+      ranges = getPercentileRangesForWindow(result);
+    } else {
+      ranges = getPercentileRanges(result);
+    }
 
     auto& value = ranges->childAt(name_);
     auto* minPct = ranges->childAt("min_pct")->as<SimpleVector<double>>();
     auto* maxPct = ranges->childAt("max_pct")->as<SimpleVector<double>>();
     auto* pctIndex = ranges->childAt("pct_index")->as<SimpleVector<int64_t>>();
 
+    // Number of non-null rows in the actual result.
+    auto numNonNull = 0;
     for (auto i = 0; i < ranges->size(); ++i) {
       if (value->isNullAt(i)) {
         VELOX_CHECK(minPct->isNullAt(i));
         VELOX_CHECK(maxPct->isNullAt(i));
         continue;
       }
-
+      numNonNull++;
       VELOX_CHECK(!minPct->isNullAt(i));
       VELOX_CHECK(!maxPct->isNullAt(i));
       VELOX_CHECK(!pctIndex->isNullAt(i));
@@ -106,6 +133,13 @@ class ApproxPercentileResultVerifier : public ResultVerifier {
         return false;
       }
     }
+    if (verifyWindow_ && numNonNull != allRanges_->size()) {
+      LOG(ERROR) << fmt::format(
+          "Expected result contains {} non-null rows while the actual result contains {}.",
+          allRanges_->size(),
+          numNonNull);
+      return false;
+    }
 
     return true;
   }
@@ -117,10 +151,36 @@ class ApproxPercentileResultVerifier : public ResultVerifier {
  private:
   static constexpr double kDefaultAccuracy = 0.0133;
 
+  // Extracts a pair of [valueField, weightField] from functionCall. weightField
+  // is an optional.
+  std::pair<std::string, std::optional<std::string>> extractValueAndWeight(
+      const core::CallTypedExprPtr& functionCall) {
+    const auto& args = functionCall->inputs();
+    const auto& valueField = fieldName(args[0]);
+    std::optional<std::string> weightField;
+    if (args.size() >= 3 && args[1]->type()->isBigint()) {
+      weightField = fieldName(args[1]);
+    }
+    return std::make_pair(valueField, weightField);
+  }
+
+  // Extracts the percentile(s) to percentiles_ and the accuracy to accuracy_.
+  // Return a boolean indicating whether the percentile is an array.
+  bool extractPercentileAndAccuracy(
+      const core::CallTypedExprPtr& functionCall,
+      const std::vector<RowVectorPtr>& input) {
+    bool isArrayPercentile;
+    percentiles_ = extractPercentiles(input, functionCall, isArrayPercentile);
+    VELOX_CHECK(!percentiles_.empty());
+
+    accuracy_ = extractAccuracy(functionCall, input[0]);
+    return isArrayPercentile;
+  }
+
   static double extractAccuracy(
-      const core::AggregationNode::Aggregate& aggregate,
+      const core::CallTypedExprPtr& functionCall,
       const RowVectorPtr& input) {
-    const auto& args = aggregate.call->inputs();
+    const auto& args = functionCall->inputs();
 
     column_index_t accuracyIndex = 2;
     if (args.size() >= 3 && args[1]->type()->isBigint()) {
@@ -243,6 +303,248 @@ class ApproxPercentileResultVerifier : public ResultVerifier {
     return AssertQueryBuilder(plan).copyResults(input[0]->pool());
   }
 
+  std::string getFrameClause(const core::WindowNode::Frame& frame) {
+    std::stringstream ss;
+    ss << core::WindowNode::windowTypeName(frame.type) << " between ";
+    if (frame.startValue) {
+      ss << frame.startValue->toString() << " ";
+    }
+    ss << core::WindowNode::boundTypeName(frame.startType) << " and ";
+    if (frame.endValue) {
+      ss << frame.endValue->toString() << " ";
+    }
+    ss << core::WindowNode::boundTypeName(frame.endType);
+    return ss.str();
+  }
+
+  std::string getOrderByClause(
+      const std::vector<SortingKeyAndOrder>& sortingKeysAndOrders) {
+    if (sortingKeysAndOrders.empty()) {
+      return "";
+    }
+    std::stringstream ss;
+    ss << "order by ";
+    for (auto i = 0; i < sortingKeysAndOrders.size(); ++i) {
+      if (i > 0) {
+        ss << ", ";
+      }
+      ss << sortingKeysAndOrders[i].key_ << " "
+         << sortingKeysAndOrders[i].sortOrder_.toString();
+    }
+    return ss.str();
+  }
+
+  std::string getPartitionByClause(
+      const std::vector<std::string>& partitionByKeys) {
+    if (partitionByKeys.empty()) {
+      return "";
+    }
+    return "partition by " + folly::join(", ", partitionByKeys);
+  }
+
+  // For each input row, calculates a map of {value : [order_min, order_max]} as
+  // 'expected' for every distinct value in the window frame of the current row,
+  // and the weighted total number of values in the frame as 'cnt'. 'order_min'
+  // is the rank right before the first appearance of 'value' when values in the
+  // current frame are sorted by sortingKeysAndOrders and 'order_max' is the
+  // rank of the last appearance of 'value'. For example, for a table 't(c0, c1,
+  // c2, weight, row_num)' and a window operation 'approx_percentile(c0,
+  // percentile) over (partition by c1 order by c2 desc rows between 1 preceding
+  // and 1 following)', this method essentially returns the result of the
+  // following query:
+  //  SELECT
+  //      c1,
+  //      row_num,
+  //      NULL AS actual,
+  //      MAP_AGG(bucket_element, order_pair) AS expected,
+  //      ARBITRARY(weight_total) AS cnt
+  //  FROM (
+  //      SELECT
+  //          c1,
+  //          row_num,
+  //          bucket_element,
+  //          CAST(
+  //              ROW(COALESCE(order_min, 0), order_max) AS ROW(
+  //                  order_min BIGINT,
+  //                  order_max BIGINT
+  //              )
+  //          ) AS order_pair,
+  //          weight_total
+  //      FROM (
+  //          SELECT
+  //              c1,
+  //              row_num,
+  //              bucket_element,
+  //              weight,
+  //              SUM(weight) OVER (
+  //                  PARTITION BY
+  //                      c1,
+  //                      row_num
+  //                  ORDER BY
+  //                      bucket_element ASC
+  //                  ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+  //              ) AS weight_total,
+  //              SUM(weight) OVER (
+  //                  PARTITION BY
+  //                      c1,
+  //                      row_num
+  //                  ORDER BY
+  //                      bucket_element ASC
+  //                  ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
+  //              ) AS order_min,
+  //              SUM(weight) OVER (
+  //                  PARTITION BY
+  //                      c1,
+  //                      row_num
+  //                  ORDER BY
+  //                      bucket_element ASC
+  //                  ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+  //              ) AS order_max
+  //          FROM (
+  //              SELECT
+  //                  c1,
+  //                  row_num,
+  //                  bucket_element,
+  //                  bucket_weight AS weight
+  //              FROM (
+  //                  SELECT
+  //                      c1,
+  //                      row_num,
+  //                      bucket_element,
+  //                      bucket_weight
+  //                  FROM (
+  //                      SELECT
+  //                          c1,
+  //                          row_num,
+  //                          TRANSFORM_VALUES(bucket, (k, v) -> ARRAY_SUM(v))
+  //                          AS bucket
+  //                      FROM (
+  //                          SELECT
+  //                              c1,
+  //                              row_num,
+  //                              MULTIMAP_AGG(c0, weight) OVER (
+  //                                  PARTITION BY
+  //                                      c1
+  //                                  ORDER BY
+  //                                      c2 DESC
+  //                                  ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
+  //                              ) AS bucket
+  //                          FROM (
+  //                              VALUES
+  //                                  (1, TRUE, 1, 2, 1),
+  //                                  (2, TRUE, 3, 1, 2),
+  //                                  (1, TRUE, 4, 1, 3),
+  //                                  (4, TRUE, 5, 1, 4),
+  //                                  (1, FALSE, 1, 2, 5),
+  //                                  (2, FALSE, 3, 1, 6),
+  //                                  (3, FALSE, 4, 1, 7),
+  //                                  (4, FALSE, 5, 1, 8)
+  //                          ) t(c0, c1, c2, weight, row_num)
+  //                      )
+  //                  ) bucketed(c1, row_num, bucket)
+  //                  CROSS JOIN UNNEST(bucket) AS tmp(bucket_element,
+  //                  bucket_weight)
+  //              )
+  //          )
+  //      )
+  //  )
+  //  GROUP BY
+  //      c1,
+  //      row_num
+  RowVectorPtr computePercentilesForWindow(
+      const std::vector<RowVectorPtr>& input,
+      const std::string& valueField,
+      const std::optional<std::string>& weightField,
+      const std::vector<SortingKeyAndOrder>& sortingKeysAndOrders,
+      const core::WindowNode::Frame& frame,
+      const TypePtr& resultType,
+      bool isArray) {
+    VELOX_CHECK(!input.empty())
+    const auto rowType = asRowType(input[0]->type());
+    const bool weighted = weightField.has_value();
+
+    std::vector<std::string> projections = groupingKeys_;
+    for (const auto& sortingKey : sortingKeysAndOrders) {
+      if (sortingKey.key_ != "row_number") {
+        projections.push_back(sortingKey.key_);
+      }
+    }
+    projections.push_back("row_number");
+    projections.push_back(fmt::format("{} as x", valueField));
+    projections.push_back(
+        fmt::format("{} as w", weighted ? weightField.value() : "1::bigint"));
+
+    PlanBuilder planBuilder;
+    planBuilder.values(input).project(projections).filter("w > 0");
+
+    auto partitionByKeysWithRowNumber =
+        getPartitionByClause(append(groupingKeys_, {"row_number"}));
+    planBuilder
+        .window({fmt::format(
+            "multimap_agg(x, w) over ({} {} {}) as bucket",
+            getPartitionByClause(groupingKeys_),
+            getOrderByClause(sortingKeysAndOrders),
+            getFrameClause(frame))})
+        .project(append(
+            groupingKeys_,
+            {"row_number",
+             "transform_values(bucket, (k, v) -> array_sum(v)) as bucket"}))
+        .unnest(append(groupingKeys_, {"row_number"}), {"bucket"})
+        .project(append(
+            groupingKeys_, {"row_number", "bucket_k", "bucket_v as weight"}))
+        .window(
+            {fmt::format(
+                 "sum(weight) over ({} order by bucket_k asc rows between unbounded preceding and unbounded following) as cnt",
+                 partitionByKeysWithRowNumber),
+             fmt::format(
+                 "sum(weight) over ({} order by bucket_k asc rows between unbounded preceding and 1 preceding) as order_min",
+                 partitionByKeysWithRowNumber),
+             fmt::format(
+                 "sum(weight) over ({} order by bucket_k asc rows between unbounded preceding and current row) as order_max",
+                 partitionByKeysWithRowNumber)})
+        .project(append(
+            groupingKeys_,
+            {"row_number",
+             "bucket_k as element",
+             "cnt",
+             "row_constructor(coalesce(order_min, 0), order_max) as order_pair"}))
+        .singleAggregation(
+            append(groupingKeys_, {"row_number"}),
+            {"map_agg(element, order_pair) as expected",
+             "arbitrary(cnt) as cnt"});
+
+    if (isArray) {
+      std::stringstream ss;
+      toTypeSql(resultType->asArray().elementType(), ss);
+
+      planBuilder
+          .appendColumns(
+              {fmt::format("sequence(1, {}) as seq", percentiles_.size())})
+          .unnest(
+              append(groupingKeys_, {"row_number", "expected", "cnt"}), {"seq"})
+          .project(append(
+              groupingKeys_,
+              {"row_number",
+               fmt::format("cast(null as {}) as actual_e", ss.str()),
+               "seq_e as pct_index",
+               "expected",
+               "cnt"}));
+    } else {
+      std::stringstream ss;
+      toTypeSql(resultType, ss);
+
+      planBuilder.project(append(
+          groupingKeys_,
+          {"row_number",
+           fmt::format("cast(null as {}) as actual", ss.str()),
+           "expected",
+           "cnt"}));
+    }
+
+    auto plan = planBuilder.planNode();
+    return AssertQueryBuilder(plan).copyResults(input[0]->pool());
+  }
+
   static const std::string& fieldName(const core::TypedExprPtr& expression) {
     auto field = core::TypedExprs::asFieldAccess(expression);
     VELOX_CHECK_NOT_NULL(field);
@@ -252,8 +554,9 @@ class ApproxPercentileResultVerifier : public ResultVerifier {
   // Extract 'percentile' argument.
   static std::vector<double> extractPercentiles(
       const std::vector<RowVectorPtr>& input,
-      const core::AggregationNode::Aggregate& aggregate) {
-    const auto args = aggregate.call->inputs();
+      const core::CallTypedExprPtr& functionCall,
+      bool& isArray) {
+    const auto args = functionCall->inputs();
     column_index_t percentileIndex = 1;
     if (args.size() >= 3 && args[1]->type()->isBigint()) {
       percentileIndex = 2;
@@ -263,9 +566,10 @@ class ApproxPercentileResultVerifier : public ResultVerifier {
 
     if (auto constantExpr = core::TypedExprs::asConstant(percentileExpr)) {
       if (constantExpr->type()->isDouble()) {
+        isArray = false;
         return {constantExpr->value().value<double>()};
       }
-
+      isArray = true;
       return toList(constantExpr->valueVector());
     }
 
@@ -273,9 +577,10 @@ class ApproxPercentileResultVerifier : public ResultVerifier {
 
     if (percentileVector->type()->isDouble()) {
       VELOX_CHECK(!percentileVector->isNullAt(0));
+      isArray = false;
       return {percentileVector->as<SimpleVector<double>>()->valueAt(0)};
     }
-
+    isArray = true;
     return toList(percentileVector);
   }
 
@@ -364,11 +669,145 @@ class ApproxPercentileResultVerifier : public ResultVerifier {
     return AssertQueryBuilder(plan).copyResults(result->pool());
   }
 
+  // For each row ([k1, k2,] x) in 'result', lookup min_pct and max_pct in
+  // 'allRanges_'. Return a vector of ([k1, k2,] x, min_pct, max_pct) rows.
+  // For example, for an actual result table 't2(c1, row_num, actual)' where c1
+  // is the partition-by key, this method essentially returns the result of the
+  // following query:
+  // SELECT
+  //     c1,
+  //     actual,
+  //     CAST(order_pair.order_min AS DOUBLE) / CAST(cnt AS DOUBLE),
+  //     CAST(order_pair.order_max AS DOUBLE) / CAST(cnt AS DOUBLE)
+  // FROM (
+  //     SELECT
+  //         c1,
+  //         actual,
+  //         expected,
+  //         value,
+  //         order_pair,
+  //         cnt
+  //     FROM (
+  //         SELECT
+  //             c1,
+  //             ARBITRARY(actual) AS actual,
+  //             ARBITRARY(expected) AS expected,
+  //             ARBITRARY(cnt) AS cnt
+  //         FROM (
+  //             SELECT
+  //                 *
+  //             FROM (
+  //                 VALUES
+  //                     (TRUE, 1, 1, NULL, NULL),
+  //                     (FALSE, 7, 3, NULL, NULL)
+  //             ) t2(c1, row_num, actual, expected, cnt)
+  //
+  //             UNION ALL
+  //
+  //             SELECT
+  //                 *
+  //             FROM allRanges_
+  //         )
+  //         GROUP BY
+  //             c1,
+  //             row_num
+  //     ) combined(c1, actual, expected, cnt)
+  //     CROSS JOIN UNNEST(expected) AS tmp(value, order_pair)
+  //     WHERE
+  //         value = actual
+  // )
+  RowVectorPtr getPercentileRangesForWindow(const RowVectorPtr& result) {
+    auto planNodeIdGenerator = std::make_shared<core::PlanNodeIdGenerator>();
+
+    core::PlanNodePtr expectedSource;
+    core::PlanNodePtr actualSource;
+    core::PlanNodePtr plan;
+    expectedSource =
+        PlanBuilder(planNodeIdGenerator).values({allRanges_}).planNode();
+    auto expectedType = allRanges_->type()->asRow().findChild("expected");
+    std::stringstream ss;
+    toTypeSql(expectedType, ss);
+    auto expectedTypeSql = ss.str();
+
+    if (result->childAt(name_)->type()->isArray()) {
+      actualSource =
+          PlanBuilder(planNodeIdGenerator)
+              .values({result})
+              .project(append(
+                  groupingKeys_,
+                  {"row_number",
+                   fmt::format("{} as actual", name_),
+                   fmt::format("cast(null as {}) as expected", expectedTypeSql),
+                   "cast(null as bigint) as cnt"}))
+              .unnest(
+                  append(groupingKeys_, {"row_number", "expected", "cnt"}),
+                  {"actual"},
+                  "pct_index")
+              .project(append(
+                  groupingKeys_,
+                  {"row_number", "actual_e", "pct_index", "expected", "cnt"}))
+              .planNode();
+
+      plan =
+          PlanBuilder(planNodeIdGenerator)
+              .localPartition({}, {expectedSource, actualSource})
+              .singleAggregation(
+                  append(groupingKeys_, {"pct_index", "row_number"}),
+                  {"arbitrary(actual_e) as actual",
+                   "arbitrary(expected) as expected",
+                   "arbitrary(cnt) as cnt"})
+              .unnest(
+                  append(groupingKeys_, {"actual", "pct_index", "cnt"}),
+                  {"expected"})
+              .filter("actual = expected_k")
+              .project(append(
+                  groupingKeys_,
+                  {fmt::format("actual as {}", name_),
+                   "pct_index - 1 as pct_index",
+                   "cast(expected_v.c1 as double) / cast(cnt as double) as min_pct",
+                   "cast(expected_v.c2 as double) / cast(cnt as double) as max_pct"}))
+              .planNode();
+
+    } else {
+      actualSource =
+          PlanBuilder(planNodeIdGenerator)
+              .values({result})
+              .project(append(
+                  groupingKeys_,
+                  {"row_number",
+                   fmt::format("{} as actual", name_),
+                   fmt::format("cast(null as {}) as expected", expectedTypeSql),
+                   "cast(null as bigint) as cnt"}))
+              .planNode();
+
+      plan =
+          PlanBuilder(planNodeIdGenerator)
+              .localPartition({}, {expectedSource, actualSource})
+              .singleAggregation(
+                  append(groupingKeys_, {"row_number"}),
+                  {"arbitrary(actual) as actual",
+                   "arbitrary(expected) as expected",
+                   "arbitrary(cnt) as cnt"})
+              .unnest(append(groupingKeys_, {"actual", "cnt"}), {"expected"})
+              .filter("actual = expected_k")
+              .project(append(
+                  groupingKeys_,
+                  {fmt::format("actual as {}", name_),
+                   "0 as pct_index",
+                   "cast(expected_v.c1 as double) / cast(cnt as double) as min_pct",
+                   "cast(expected_v.c2 as double) / cast(cnt as double) as max_pct"}))
+              .planNode();
+    }
+    return AssertQueryBuilder(plan).copyResults(result->pool());
+  }
+
   std::vector<std::string> groupingKeys_;
   std::string name_;
   std::vector<double> percentiles_;
   double accuracy_;
   RowVectorPtr allRanges_;
+  TypePtr resultType_;
+  bool verifyWindow_;
 };
 
 } // namespace facebook::velox::exec::test
diff --git a/velox/functions/prestosql/fuzzer/WindowFuzzerTest.cpp b/velox/functions/prestosql/fuzzer/WindowFuzzerTest.cpp
index 00b5816367af..9a339a2d521f 100644
--- a/velox/functions/prestosql/fuzzer/WindowFuzzerTest.cpp
+++ b/velox/functions/prestosql/fuzzer/WindowFuzzerTest.cpp
@@ -24,6 +24,7 @@
 #include "velox/functions/prestosql/fuzzer/ApproxDistinctInputGenerator.h"
 #include "velox/functions/prestosql/fuzzer/ApproxDistinctResultVerifier.h"
 #include "velox/functions/prestosql/fuzzer/ApproxPercentileInputGenerator.h"
+#include "velox/functions/prestosql/fuzzer/ApproxPercentileResultVerifier.h"
 #include "velox/functions/prestosql/fuzzer/MinMaxInputGenerator.h"
 #include "velox/functions/prestosql/fuzzer/WindowOffsetInputGenerator.h"
 #include "velox/functions/prestosql/registration/RegistrationFunctions.h"
@@ -117,6 +118,7 @@ int main(int argc, char** argv) {
   // fields.
   // TODO: allow custom result verifiers.
   using facebook::velox::exec::test::ApproxDistinctResultVerifier;
+  using facebook::velox::exec::test::ApproxPercentileResultVerifier;
 
   static const std::unordered_map<
       std::string,
@@ -125,7 +127,8 @@ int main(int argc, char** argv) {
           // Approx functions.
           {"approx_distinct", std::make_shared<ApproxDistinctResultVerifier>()},
           {"approx_set", nullptr},
-          {"approx_percentile", nullptr},
+          {"approx_percentile",
+           std::make_shared<ApproxPercentileResultVerifier>()},
           {"approx_most_frequent", nullptr},
           {"merge", nullptr},
           // Semantically inconsistent functions