From 9b79246226faf962b489570f864e49f953cbb63c Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Wed, 26 Mar 2014 00:01:39 -0700 Subject: [PATCH] Updated doc for Limit. --- .../org/apache/spark/sql/execution/basicOperators.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala index b815e1f2cd2fd..4b767e35ea32a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala @@ -63,12 +63,14 @@ case class Union(children: Seq[SparkPlan])(@transient sc: SparkContext) extends } /** - * Take the first limit elements. + * Take the first limit elements. Note that the implementation is different depending on whether + * this is a terminal operator or not. If it is terminal and is invoked using executeCollect, + * this operator uses Spark's take method on the Spark driver. If it is not terminal or is + * invoked using execute, we first take the limit on each partition, and then repartition all the + * data to a single partition to compute the global limit. */ case class Limit(limit: Int, child: SparkPlan)(@transient sc: SparkContext) extends UnaryNode { override def otherCopyArgs = sc :: Nil - // Note that the implementation is different depending on - // whether this is a terminal operator or not. override def output = child.output