Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-1938] [SQL] ApproxCountDistinctMergeFunction should return Int value. #893

Closed
wants to merge 4 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ abstract class AggregateFunction
override def dataType = base.dataType

def update(input: Row): Unit
override def eval(input: Row): Any

// Do we really need this?
override def newInstance() = makeCopy(productIterator.map { case a: AnyRef => a }.toArray)
Expand Down Expand Up @@ -166,7 +165,7 @@ case class CountDistinct(expressions: Seq[Expression]) extends AggregateExpressi
override def children = expressions
override def references = expressions.flatMap(_.references).toSet
override def nullable = false
override def dataType = IntegerType
override def dataType = LongType
override def toString = s"COUNT(DISTINCT ${expressions.mkString(",")})"
override def newInstance() = new CountDistinctFunction(expressions, this)
}
Expand All @@ -184,7 +183,7 @@ case class ApproxCountDistinctMerge(child: Expression, relativeSD: Double)
extends AggregateExpression with trees.UnaryNode[Expression] {
override def references = child.references
override def nullable = false
override def dataType = IntegerType
override def dataType = LongType
override def toString = s"APPROXIMATE COUNT(DISTINCT $child)"
override def newInstance() = new ApproxCountDistinctMergeFunction(child, this, relativeSD)
}
Expand All @@ -193,7 +192,7 @@ case class ApproxCountDistinct(child: Expression, relativeSD: Double = 0.05)
extends PartialAggregate with trees.UnaryNode[Expression] {
override def references = child.references
override def nullable = false
override def dataType = IntegerType
override def dataType = LongType
override def toString = s"APPROXIMATE COUNT(DISTINCT $child)"

override def asPartial: SplitEvaluation = {
Expand Down Expand Up @@ -391,7 +390,7 @@ case class CountDistinctFunction(expr: Seq[Expression], base: AggregateExpressio
}
}

override def eval(input: Row): Any = seen.size
override def eval(input: Row): Any = seen.size.toLong
}

case class FirstFunction(expr: Expression, base: AggregateExpression) extends AggregateFunction {
Expand Down