From f85e21d052b071377d11696f565153358e33a112 Mon Sep 17 00:00:00 2001 From: Wei He Date: Mon, 8 Jul 2024 20:21:04 -0700 Subject: [PATCH] Extend ApproxPercentileResultVerifier for window fuzzer (#10367) Summary: Pull Request resolved: https://github.com/facebookincubator/velox/pull/10367 Reviewed By: kevinwilfong Differential Revision: D59257657 --- velox/exec/fuzzer/FuzzerUtil.h | 10 + velox/exec/fuzzer/ResultVerifier.h | 2 + velox/exec/fuzzer/WindowFuzzer.cpp | 19 +- velox/exec/fuzzer/WindowFuzzer.h | 9 +- .../fuzzer/ApproxDistinctResultVerifier.h | 1 + .../fuzzer/ApproxPercentileResultVerifier.h | 479 +++++++++++++++++- .../prestosql/fuzzer/WindowFuzzerTest.cpp | 5 +- 7 files changed, 492 insertions(+), 33 deletions(-) diff --git a/velox/exec/fuzzer/FuzzerUtil.h b/velox/exec/fuzzer/FuzzerUtil.h index 81efca5f9023..09c00c66a033 100644 --- a/velox/exec/fuzzer/FuzzerUtil.h +++ b/velox/exec/fuzzer/FuzzerUtil.h @@ -13,12 +13,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#pragma once +#include "velox/core/PlanNode.h" #include "velox/exec/Split.h" namespace facebook::velox::exec::test { const std::string kHiveConnectorId = "test-hive"; +struct SortingKeyAndOrder { + const std::string key_; + const core::SortOrder sortOrder_; + + SortingKeyAndOrder(std::string key, core::SortOrder sortOrder) + : key_(std::move(key)), sortOrder_(std::move(sortOrder)) {} +}; + /// Write the vector to the path. void writeToFile( const std::string& path, diff --git a/velox/exec/fuzzer/ResultVerifier.h b/velox/exec/fuzzer/ResultVerifier.h index 36cf9c171ac2..bda5ccecc7f1 100644 --- a/velox/exec/fuzzer/ResultVerifier.h +++ b/velox/exec/fuzzer/ResultVerifier.h @@ -18,6 +18,7 @@ #include #include "velox/core/PlanNode.h" +#include "velox/exec/fuzzer/FuzzerUtil.h" #include "velox/vector/ComplexVector.h" namespace facebook::velox::exec::test { @@ -61,6 +62,7 @@ class ResultVerifier { virtual void initializeWindow( const std::vector& /*input*/, const std::vector& /*partitionByKeys*/, + const std::vector& /*sortingKeysAndOrders*/, const core::WindowNode::Function& /*function*/, const std::string& /*frame*/, const std::string& /*windowName*/) { diff --git a/velox/exec/fuzzer/WindowFuzzer.cpp b/velox/exec/fuzzer/WindowFuzzer.cpp index 8f9795534587..734e87a702ac 100644 --- a/velox/exec/fuzzer/WindowFuzzer.cpp +++ b/velox/exec/fuzzer/WindowFuzzer.cpp @@ -177,8 +177,7 @@ std::string WindowFuzzer::getFrame( return frame.str(); } -std::vector -WindowFuzzer::generateSortingKeysAndOrders( +std::vector WindowFuzzer::generateSortingKeysAndOrders( const std::string& prefix, std::vector& names, std::vector& types) { @@ -359,11 +358,17 @@ void initializeVerifier( const std::shared_ptr& customVerifier, const std::vector& input, const std::vector& partitionKeys, + const std::vector& sortingKeysAndOrders, const std::string& frame) { const auto& windowNode = std::dynamic_pointer_cast(plan); customVerifier->initializeWindow( - input, partitionKeys, windowNode->windowFunctions()[0], frame, "w0"); + input, + partitionKeys, + sortingKeysAndOrders, + windowNode->windowFunctions()[0], + frame, + "w0"); } } // namespace @@ -424,7 +429,13 @@ bool WindowFuzzer::verifyWindow( VELOX_CHECK( customVerifier->supportsVerify(), "Window fuzzer only uses custom verify() methods."); - initializeVerifier(plan, customVerifier, input, partitionKeys, frame); + initializeVerifier( + plan, + customVerifier, + input, + partitionKeys, + sortingKeysAndOrders, + frame); customVerifier->verify(resultOrError.result); } } diff --git a/velox/exec/fuzzer/WindowFuzzer.h b/velox/exec/fuzzer/WindowFuzzer.h index bf36de8a9e3d..f53e26098fb4 100644 --- a/velox/exec/fuzzer/WindowFuzzer.h +++ b/velox/exec/fuzzer/WindowFuzzer.h @@ -18,6 +18,7 @@ #include "velox/exec/Aggregate.h" #include "velox/exec/WindowFunction.h" #include "velox/exec/fuzzer/AggregationFuzzerBase.h" +#include "velox/exec/fuzzer/FuzzerUtil.h" #include "velox/exec/fuzzer/PrestoQueryRunner.h" #include "velox/exec/fuzzer/ReferenceQueryRunner.h" #include "velox/vector/fuzzer/VectorFuzzer.h" @@ -76,14 +77,6 @@ class WindowFuzzer : public AggregationFuzzerBase { void go(const std::string& planPath); private: - struct SortingKeyAndOrder { - const std::string key_; - const core::SortOrder sortOrder_; - - SortingKeyAndOrder(std::string key, core::SortOrder sortOrder) - : key_(std::move(key)), sortOrder_(std::move(sortOrder)) {} - }; - void addWindowFunctionSignatures(const WindowFunctionMap& signatureMap); // Return a randomly generated frame clause string together with a boolean diff --git a/velox/functions/prestosql/fuzzer/ApproxDistinctResultVerifier.h b/velox/functions/prestosql/fuzzer/ApproxDistinctResultVerifier.h index 95295010aa19..32c48d84d226 100644 --- a/velox/functions/prestosql/fuzzer/ApproxDistinctResultVerifier.h +++ b/velox/functions/prestosql/fuzzer/ApproxDistinctResultVerifier.h @@ -64,6 +64,7 @@ class ApproxDistinctResultVerifier : public ResultVerifier { void initializeWindow( const std::vector& input, const std::vector& partitionByKeys, + const std::vector& /*sortingKeysAndOrders*/, const core::WindowNode::Function& function, const std::string& frame, const std::string& windowName) override { diff --git a/velox/functions/prestosql/fuzzer/ApproxPercentileResultVerifier.h b/velox/functions/prestosql/fuzzer/ApproxPercentileResultVerifier.h index 6059f112e312..0ef745d8d50b 100644 --- a/velox/functions/prestosql/fuzzer/ApproxPercentileResultVerifier.h +++ b/velox/functions/prestosql/fuzzer/ApproxPercentileResultVerifier.h @@ -45,26 +45,19 @@ class ApproxPercentileResultVerifier : public ResultVerifier { const core::AggregationNode::Aggregate& aggregate, const std::string& aggregateName) override { VELOX_CHECK(!input.empty()); + verifyWindow_ = false; int64_t numInputs = 0; for (const auto& v : input) { numInputs += v->size(); } - const auto& args = aggregate.call->inputs(); - const auto& valueField = fieldName(args[0]); - std::optional weightField; - if (args.size() >= 3 && args[1]->type()->isBigint()) { - weightField = fieldName(args[1]); - } - groupingKeys_ = groupingKeys; name_ = aggregateName; - percentiles_ = extractPercentiles(input, aggregate); - VELOX_CHECK(!percentiles_.empty()); - - accuracy_ = extractAccuracy(aggregate, input[0]); + const auto& [valueField, weightField] = + extractValueAndWeight(aggregate.call); + extractPercentileAndAccuracy(aggregate.call, input); // Compute percentiles for all values. allRanges_ = @@ -72,6 +65,34 @@ class ApproxPercentileResultVerifier : public ResultVerifier { VELOX_CHECK_LE(allRanges_->size(), numInputs); } + void initializeWindow( + const std::vector& input, + const std::vector& partitionByKeys, + const std::vector& sortingKeysAndOrders, + const core::WindowNode::Function& function, + const std::string& /*frame*/, + const std::string& windowName) override { + VELOX_CHECK(!input.empty()); + verifyWindow_ = true; + + groupingKeys_ = partitionByKeys; + name_ = windowName; + + const auto& [valueField, weightField] = + extractValueAndWeight(function.functionCall); + bool isArrayPercentile = + extractPercentileAndAccuracy(function.functionCall, input); + + allRanges_ = computePercentilesForWindow( + input, + valueField, + weightField, + sortingKeysAndOrders, + function.frame, + function.functionCall->type(), + isArrayPercentile); + } + bool compare( const RowVectorPtr& /*result*/, const RowVectorPtr& /*altResult*/) override { @@ -80,21 +101,27 @@ class ApproxPercentileResultVerifier : public ResultVerifier { bool verify(const RowVectorPtr& result) override { // Compute acceptable ranges of percentiles for each value in 'result'. - auto ranges = getPercentileRanges(result); - // VELOX_CHECK_EQ(ranges->size(), result->size() * percentiles_.size()); + RowVectorPtr ranges; + if (verifyWindow_) { + ranges = getPercentileRangesForWindow(result); + } else { + ranges = getPercentileRanges(result); + } auto& value = ranges->childAt(name_); auto* minPct = ranges->childAt("min_pct")->as>(); auto* maxPct = ranges->childAt("max_pct")->as>(); auto* pctIndex = ranges->childAt("pct_index")->as>(); + // Number of non-null rows in the actual result. + auto numNonNull = 0; for (auto i = 0; i < ranges->size(); ++i) { if (value->isNullAt(i)) { VELOX_CHECK(minPct->isNullAt(i)); VELOX_CHECK(maxPct->isNullAt(i)); continue; } - + numNonNull++; VELOX_CHECK(!minPct->isNullAt(i)); VELOX_CHECK(!maxPct->isNullAt(i)); VELOX_CHECK(!pctIndex->isNullAt(i)); @@ -106,6 +133,13 @@ class ApproxPercentileResultVerifier : public ResultVerifier { return false; } } + if (verifyWindow_ && numNonNull != allRanges_->size()) { + LOG(ERROR) << fmt::format( + "Expected result contains {} non-null rows while the actual result contains {}.", + allRanges_->size(), + numNonNull); + return false; + } return true; } @@ -117,10 +151,36 @@ class ApproxPercentileResultVerifier : public ResultVerifier { private: static constexpr double kDefaultAccuracy = 0.0133; + // Extracts a pair of [valueField, weightField] from functionCall. weightField + // is an optional. + std::pair> extractValueAndWeight( + const core::CallTypedExprPtr& functionCall) { + const auto& args = functionCall->inputs(); + const auto& valueField = fieldName(args[0]); + std::optional weightField; + if (args.size() >= 3 && args[1]->type()->isBigint()) { + weightField = fieldName(args[1]); + } + return std::make_pair(valueField, weightField); + } + + // Extracts the percentile(s) to percentiles_ and the accuracy to accuracy_. + // Return a boolean indicating whether the percentile is an array. + bool extractPercentileAndAccuracy( + const core::CallTypedExprPtr& functionCall, + const std::vector& input) { + bool isArrayPercentile; + percentiles_ = extractPercentiles(input, functionCall, isArrayPercentile); + VELOX_CHECK(!percentiles_.empty()); + + accuracy_ = extractAccuracy(functionCall, input[0]); + return isArrayPercentile; + } + static double extractAccuracy( - const core::AggregationNode::Aggregate& aggregate, + const core::CallTypedExprPtr& functionCall, const RowVectorPtr& input) { - const auto& args = aggregate.call->inputs(); + const auto& args = functionCall->inputs(); column_index_t accuracyIndex = 2; if (args.size() >= 3 && args[1]->type()->isBigint()) { @@ -243,6 +303,248 @@ class ApproxPercentileResultVerifier : public ResultVerifier { return AssertQueryBuilder(plan).copyResults(input[0]->pool()); } + std::string getFrameClause(const core::WindowNode::Frame& frame) { + std::stringstream ss; + ss << core::WindowNode::windowTypeName(frame.type) << " between "; + if (frame.startValue) { + ss << frame.startValue->toString() << " "; + } + ss << core::WindowNode::boundTypeName(frame.startType) << " and "; + if (frame.endValue) { + ss << frame.endValue->toString() << " "; + } + ss << core::WindowNode::boundTypeName(frame.endType); + return ss.str(); + } + + std::string getOrderByClause( + const std::vector& sortingKeysAndOrders) { + if (sortingKeysAndOrders.empty()) { + return ""; + } + std::stringstream ss; + ss << "order by "; + for (auto i = 0; i < sortingKeysAndOrders.size(); ++i) { + if (i > 0) { + ss << ", "; + } + ss << sortingKeysAndOrders[i].key_ << " " + << sortingKeysAndOrders[i].sortOrder_.toString(); + } + return ss.str(); + } + + std::string getPartitionByClause( + const std::vector& partitionByKeys) { + if (partitionByKeys.empty()) { + return ""; + } + return "partition by " + folly::join(", ", partitionByKeys); + } + + // For each input row, calculates a map of {value : [order_min, order_max]} as + // 'expected' for every distinct value in the window frame of the current row, + // and the weighted total number of values in the frame as 'cnt'. 'order_min' + // is the rank right before the first appearance of 'value' when values in the + // current frame are sorted by sortingKeysAndOrders and 'order_max' is the + // rank of the last appearance of 'value'. For example, for a table 't(c0, c1, + // c2, weight, row_num)' and a window operation 'approx_percentile(c0, + // percentile) over (partition by c1 order by c2 desc rows between 1 preceding + // and 1 following)', this method essentially returns the result of the + // following query: + // SELECT + // c1, + // row_num, + // NULL AS actual, + // MAP_AGG(bucket_element, order_pair) AS expected, + // ARBITRARY(weight_total) AS cnt + // FROM ( + // SELECT + // c1, + // row_num, + // bucket_element, + // CAST( + // ROW(COALESCE(order_min, 0), order_max) AS ROW( + // order_min BIGINT, + // order_max BIGINT + // ) + // ) AS order_pair, + // weight_total + // FROM ( + // SELECT + // c1, + // row_num, + // bucket_element, + // weight, + // SUM(weight) OVER ( + // PARTITION BY + // c1, + // row_num + // ORDER BY + // bucket_element ASC + // ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + // ) AS weight_total, + // SUM(weight) OVER ( + // PARTITION BY + // c1, + // row_num + // ORDER BY + // bucket_element ASC + // ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING + // ) AS order_min, + // SUM(weight) OVER ( + // PARTITION BY + // c1, + // row_num + // ORDER BY + // bucket_element ASC + // ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + // ) AS order_max + // FROM ( + // SELECT + // c1, + // row_num, + // bucket_element, + // bucket_weight AS weight + // FROM ( + // SELECT + // c1, + // row_num, + // bucket_element, + // bucket_weight + // FROM ( + // SELECT + // c1, + // row_num, + // TRANSFORM_VALUES(bucket, (k, v) -> ARRAY_SUM(v)) + // AS bucket + // FROM ( + // SELECT + // c1, + // row_num, + // MULTIMAP_AGG(c0, weight) OVER ( + // PARTITION BY + // c1 + // ORDER BY + // c2 DESC + // ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING + // ) AS bucket + // FROM ( + // VALUES + // (1, TRUE, 1, 2, 1), + // (2, TRUE, 3, 1, 2), + // (1, TRUE, 4, 1, 3), + // (4, TRUE, 5, 1, 4), + // (1, FALSE, 1, 2, 5), + // (2, FALSE, 3, 1, 6), + // (3, FALSE, 4, 1, 7), + // (4, FALSE, 5, 1, 8) + // ) t(c0, c1, c2, weight, row_num) + // ) + // ) bucketed(c1, row_num, bucket) + // CROSS JOIN UNNEST(bucket) AS tmp(bucket_element, + // bucket_weight) + // ) + // ) + // ) + // ) + // GROUP BY + // c1, + // row_num + RowVectorPtr computePercentilesForWindow( + const std::vector& input, + const std::string& valueField, + const std::optional& weightField, + const std::vector& sortingKeysAndOrders, + const core::WindowNode::Frame& frame, + const TypePtr& resultType, + bool isArray) { + VELOX_CHECK(!input.empty()) + const auto rowType = asRowType(input[0]->type()); + const bool weighted = weightField.has_value(); + + std::vector projections = groupingKeys_; + for (const auto& sortingKey : sortingKeysAndOrders) { + if (sortingKey.key_ != "row_number") { + projections.push_back(sortingKey.key_); + } + } + projections.push_back("row_number"); + projections.push_back(fmt::format("{} as x", valueField)); + projections.push_back( + fmt::format("{} as w", weighted ? weightField.value() : "1::bigint")); + + PlanBuilder planBuilder; + planBuilder.values(input).project(projections).filter("w > 0"); + + auto partitionByKeysWithRowNumber = + getPartitionByClause(append(groupingKeys_, {"row_number"})); + planBuilder + .window({fmt::format( + "multimap_agg(x, w) over ({} {} {}) as bucket", + getPartitionByClause(groupingKeys_), + getOrderByClause(sortingKeysAndOrders), + getFrameClause(frame))}) + .project(append( + groupingKeys_, + {"row_number", + "transform_values(bucket, (k, v) -> array_sum(v)) as bucket"})) + .unnest(append(groupingKeys_, {"row_number"}), {"bucket"}) + .project(append( + groupingKeys_, {"row_number", "bucket_k", "bucket_v as weight"})) + .window( + {fmt::format( + "sum(weight) over ({} order by bucket_k asc rows between unbounded preceding and unbounded following) as cnt", + partitionByKeysWithRowNumber), + fmt::format( + "sum(weight) over ({} order by bucket_k asc rows between unbounded preceding and 1 preceding) as order_min", + partitionByKeysWithRowNumber), + fmt::format( + "sum(weight) over ({} order by bucket_k asc rows between unbounded preceding and current row) as order_max", + partitionByKeysWithRowNumber)}) + .project(append( + groupingKeys_, + {"row_number", + "bucket_k as element", + "cnt", + "row_constructor(coalesce(order_min, 0), order_max) as order_pair"})) + .singleAggregation( + append(groupingKeys_, {"row_number"}), + {"map_agg(element, order_pair) as expected", + "arbitrary(cnt) as cnt"}); + + if (isArray) { + std::stringstream ss; + toTypeSql(resultType->asArray().elementType(), ss); + + planBuilder + .appendColumns( + {fmt::format("sequence(1, {}) as seq", percentiles_.size())}) + .unnest( + append(groupingKeys_, {"row_number", "expected", "cnt"}), {"seq"}) + .project(append( + groupingKeys_, + {"row_number", + fmt::format("cast(null as {}) as actual_e", ss.str()), + "seq_e as pct_index", + "expected", + "cnt"})); + } else { + std::stringstream ss; + toTypeSql(resultType, ss); + + planBuilder.project(append( + groupingKeys_, + {"row_number", + fmt::format("cast(null as {}) as actual", ss.str()), + "expected", + "cnt"})); + } + + auto plan = planBuilder.planNode(); + return AssertQueryBuilder(plan).copyResults(input[0]->pool()); + } + static const std::string& fieldName(const core::TypedExprPtr& expression) { auto field = core::TypedExprs::asFieldAccess(expression); VELOX_CHECK_NOT_NULL(field); @@ -252,8 +554,9 @@ class ApproxPercentileResultVerifier : public ResultVerifier { // Extract 'percentile' argument. static std::vector extractPercentiles( const std::vector& input, - const core::AggregationNode::Aggregate& aggregate) { - const auto args = aggregate.call->inputs(); + const core::CallTypedExprPtr& functionCall, + bool& isArray) { + const auto args = functionCall->inputs(); column_index_t percentileIndex = 1; if (args.size() >= 3 && args[1]->type()->isBigint()) { percentileIndex = 2; @@ -263,9 +566,10 @@ class ApproxPercentileResultVerifier : public ResultVerifier { if (auto constantExpr = core::TypedExprs::asConstant(percentileExpr)) { if (constantExpr->type()->isDouble()) { + isArray = false; return {constantExpr->value().value()}; } - + isArray = true; return toList(constantExpr->valueVector()); } @@ -273,9 +577,10 @@ class ApproxPercentileResultVerifier : public ResultVerifier { if (percentileVector->type()->isDouble()) { VELOX_CHECK(!percentileVector->isNullAt(0)); + isArray = false; return {percentileVector->as>()->valueAt(0)}; } - + isArray = true; return toList(percentileVector); } @@ -364,11 +669,145 @@ class ApproxPercentileResultVerifier : public ResultVerifier { return AssertQueryBuilder(plan).copyResults(result->pool()); } + // For each row ([k1, k2,] x) in 'result', lookup min_pct and max_pct in + // 'allRanges_'. Return a vector of ([k1, k2,] x, min_pct, max_pct) rows. + // For example, for an actual result table 't2(c1, row_num, actual)' where c1 + // is the partition-by key, this method essentially returns the result of the + // following query: + // SELECT + // c1, + // actual, + // CAST(order_pair.order_min AS DOUBLE) / CAST(cnt AS DOUBLE), + // CAST(order_pair.order_max AS DOUBLE) / CAST(cnt AS DOUBLE) + // FROM ( + // SELECT + // c1, + // actual, + // expected, + // value, + // order_pair, + // cnt + // FROM ( + // SELECT + // c1, + // ARBITRARY(actual) AS actual, + // ARBITRARY(expected) AS expected, + // ARBITRARY(cnt) AS cnt + // FROM ( + // SELECT + // * + // FROM ( + // VALUES + // (TRUE, 1, 1, NULL, NULL), + // (FALSE, 7, 3, NULL, NULL) + // ) t2(c1, row_num, actual, expected, cnt) + // + // UNION ALL + // + // SELECT + // * + // FROM allRanges_ + // ) + // GROUP BY + // c1, + // row_num + // ) combined(c1, actual, expected, cnt) + // CROSS JOIN UNNEST(expected) AS tmp(value, order_pair) + // WHERE + // value = actual + // ) + RowVectorPtr getPercentileRangesForWindow(const RowVectorPtr& result) { + auto planNodeIdGenerator = std::make_shared(); + + core::PlanNodePtr expectedSource; + core::PlanNodePtr actualSource; + core::PlanNodePtr plan; + expectedSource = + PlanBuilder(planNodeIdGenerator).values({allRanges_}).planNode(); + auto expectedType = allRanges_->type()->asRow().findChild("expected"); + std::stringstream ss; + toTypeSql(expectedType, ss); + auto expectedTypeSql = ss.str(); + + if (result->childAt(name_)->type()->isArray()) { + actualSource = + PlanBuilder(planNodeIdGenerator) + .values({result}) + .project(append( + groupingKeys_, + {"row_number", + fmt::format("{} as actual", name_), + fmt::format("cast(null as {}) as expected", expectedTypeSql), + "cast(null as bigint) as cnt"})) + .unnest( + append(groupingKeys_, {"row_number", "expected", "cnt"}), + {"actual"}, + "pct_index") + .project(append( + groupingKeys_, + {"row_number", "actual_e", "pct_index", "expected", "cnt"})) + .planNode(); + + plan = + PlanBuilder(planNodeIdGenerator) + .localPartition({}, {expectedSource, actualSource}) + .singleAggregation( + append(groupingKeys_, {"pct_index", "row_number"}), + {"arbitrary(actual_e) as actual", + "arbitrary(expected) as expected", + "arbitrary(cnt) as cnt"}) + .unnest( + append(groupingKeys_, {"actual", "pct_index", "cnt"}), + {"expected"}) + .filter("actual = expected_k") + .project(append( + groupingKeys_, + {fmt::format("actual as {}", name_), + "pct_index - 1 as pct_index", + "cast(expected_v.c1 as double) / cast(cnt as double) as min_pct", + "cast(expected_v.c2 as double) / cast(cnt as double) as max_pct"})) + .planNode(); + + } else { + actualSource = + PlanBuilder(planNodeIdGenerator) + .values({result}) + .project(append( + groupingKeys_, + {"row_number", + fmt::format("{} as actual", name_), + fmt::format("cast(null as {}) as expected", expectedTypeSql), + "cast(null as bigint) as cnt"})) + .planNode(); + + plan = + PlanBuilder(planNodeIdGenerator) + .localPartition({}, {expectedSource, actualSource}) + .singleAggregation( + append(groupingKeys_, {"row_number"}), + {"arbitrary(actual) as actual", + "arbitrary(expected) as expected", + "arbitrary(cnt) as cnt"}) + .unnest(append(groupingKeys_, {"actual", "cnt"}), {"expected"}) + .filter("actual = expected_k") + .project(append( + groupingKeys_, + {fmt::format("actual as {}", name_), + "0 as pct_index", + "cast(expected_v.c1 as double) / cast(cnt as double) as min_pct", + "cast(expected_v.c2 as double) / cast(cnt as double) as max_pct"})) + .planNode(); + } + return AssertQueryBuilder(plan).copyResults(result->pool()); + } + std::vector groupingKeys_; std::string name_; std::vector percentiles_; double accuracy_; RowVectorPtr allRanges_; + TypePtr resultType_; + bool verifyWindow_; }; } // namespace facebook::velox::exec::test diff --git a/velox/functions/prestosql/fuzzer/WindowFuzzerTest.cpp b/velox/functions/prestosql/fuzzer/WindowFuzzerTest.cpp index 00b5816367af..9a339a2d521f 100644 --- a/velox/functions/prestosql/fuzzer/WindowFuzzerTest.cpp +++ b/velox/functions/prestosql/fuzzer/WindowFuzzerTest.cpp @@ -24,6 +24,7 @@ #include "velox/functions/prestosql/fuzzer/ApproxDistinctInputGenerator.h" #include "velox/functions/prestosql/fuzzer/ApproxDistinctResultVerifier.h" #include "velox/functions/prestosql/fuzzer/ApproxPercentileInputGenerator.h" +#include "velox/functions/prestosql/fuzzer/ApproxPercentileResultVerifier.h" #include "velox/functions/prestosql/fuzzer/MinMaxInputGenerator.h" #include "velox/functions/prestosql/fuzzer/WindowOffsetInputGenerator.h" #include "velox/functions/prestosql/registration/RegistrationFunctions.h" @@ -117,6 +118,7 @@ int main(int argc, char** argv) { // fields. // TODO: allow custom result verifiers. using facebook::velox::exec::test::ApproxDistinctResultVerifier; + using facebook::velox::exec::test::ApproxPercentileResultVerifier; static const std::unordered_map< std::string, @@ -125,7 +127,8 @@ int main(int argc, char** argv) { // Approx functions. {"approx_distinct", std::make_shared()}, {"approx_set", nullptr}, - {"approx_percentile", nullptr}, + {"approx_percentile", + std::make_shared()}, {"approx_most_frequent", nullptr}, {"merge", nullptr}, // Semantically inconsistent functions