Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
[NSE-1065] fix on count distinct w/ keys (#1090)
Browse files Browse the repository at this point in the history
* fix on count distinct w/ keys

This patch fixes the hashagg w/ filter w/ groupby keys

Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>

* fix

Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>

* fix format

Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>

Signed-off-by: Yuan Zhou <yuan.zhou@intel.com>
  • Loading branch information
zhouyuan authored Sep 6, 2022
1 parent 918f2d6 commit 21d2a0b
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,21 @@ class NativeSQLConvertedSuite extends QueryTest
checkAnswer(df, Seq(Row(2, 4)))
}

test("2 count distinct with group by") {
Seq[(Integer, Integer)](
(1, 1),
(1, 2),
(2, 1),
(2, 2),
(3, 1),
(3, 3))
.toDF("a", "b")
.createOrReplaceTempView("testData")
val df = sql(
"SELECT COUNT(DISTINCT (CASE WHEN b > 1 THEN 1 END)), COUNT(DISTINCT (CASE WHEN b > 2 THEN 1 END)) FROM testData group by a")
checkAnswer(df, Seq(Row(1, 0), Row(1, 0), Row(1, 1)))
}

test("left anti - 1") {
Seq[(java.lang.Long, Double)](
(null, 1.0),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -473,14 +473,32 @@ class CountDistinctAction : public ActionBase {
cache_.resize(max_group_id + 1, 0);
length_ = cache_.size();
}

in_list_ = in_list;
row_id = 0;
assert(in_list.size() > 1);
int gid = in_list.size() - 1;
typed_key_in = std::dynamic_pointer_cast<arrow::BooleanArray>(in_list[gid]);
// prepare evaluate lambda
*on_valid = [this](int dest_group_id) {
cache_[dest_group_id] += 1;
bool foundNull = false;
for (int colId = 0; colId < in_list_.size() - 1; colId++) {
if (in_list_[colId]->IsNull(row_id)) {
foundNull = true;
break;
}
}
if (!foundNull) {
cache_[dest_group_id] += 1;
}

row_id++;
return arrow::Status::OK();
};

*on_null = [this]() { return arrow::Status::OK(); };
*on_null = [this]() {
row_id++;
return arrow::Status::OK();
};
return arrow::Status::OK();
}

Expand Down Expand Up @@ -587,13 +605,16 @@ class CountDistinctAction : public ActionBase {
using ResBuilderType = typename arrow::TypeTraits<DataType>::BuilderType;
// input
arrow::compute::ExecContext* ctx_;
int row_id;
// for debug only
int32_t localGid_ = -1;
// result
using CType = typename arrow::TypeTraits<DataType>::CType;
std::vector<CType> cache_;
std::unique_ptr<ResBuilderType> builder_;
uint64_t length_ = 0;
std::shared_ptr<arrow::BooleanArray> typed_key_in;
ArrayList in_list_;
};

//////////////// CountLiteralAction ///////////////
Expand Down

0 comments on commit 21d2a0b

Please sign in to comment.