Skip to content

Commit

Permalink
[SQL] JavaDoc update for various DataFrame functions.
Browse files Browse the repository at this point in the history
Author: Reynold Xin <rxin@databricks.com>

Closes apache#5935 from rxin/df-doc1 and squashes the following commits:

aaeaadb [Reynold Xin] [SQL] JavaDoc update for various DataFrame functions.
  • Loading branch information
rxin authored and jeanlyn committed May 28, 2015
1 parent b4e3b1e commit d001700
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 21 deletions.
4 changes: 2 additions & 2 deletions sql/core/src/main/scala/org/apache/spark/sql/Column.scala
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,14 @@ class Column(protected[sql] val expr: Expression) extends Logging {

/**
* Inversion of boolean expression, i.e. NOT.
* {{
* {{{
* // Scala: select rows that are not active (isActive === false)
* df.filter( !df("isActive") )
*
* // Java:
* import static org.apache.spark.sql.functions.*;
* df.filter( not(df.col("isActive")) );
* }}
* }}}
*
* @group expr_ops
*/
Expand Down
16 changes: 8 additions & 8 deletions sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
Original file line number Diff line number Diff line change
Expand Up @@ -681,11 +681,11 @@ class DataFrame private[sql](

/**
* (Scala-specific) Aggregates on the entire [[DataFrame]] without groups.
* {{
* {{{
* // df.agg(...) is a shorthand for df.groupBy().agg(...)
* df.agg("age" -> "max", "salary" -> "avg")
* df.groupBy().agg("age" -> "max", "salary" -> "avg")
* }}
* }}}
* @group dfops
*/
def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = {
Expand All @@ -694,33 +694,33 @@ class DataFrame private[sql](

/**
* (Scala-specific) Aggregates on the entire [[DataFrame]] without groups.
* {{
* {{{
* // df.agg(...) is a shorthand for df.groupBy().agg(...)
* df.agg(Map("age" -> "max", "salary" -> "avg"))
* df.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
* }}
* }}}
* @group dfops
*/
def agg(exprs: Map[String, String]): DataFrame = groupBy().agg(exprs)

/**
* (Java-specific) Aggregates on the entire [[DataFrame]] without groups.
* {{
* {{{
* // df.agg(...) is a shorthand for df.groupBy().agg(...)
* df.agg(Map("age" -> "max", "salary" -> "avg"))
* df.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
* }}
* }}}
* @group dfops
*/
def agg(exprs: java.util.Map[String, String]): DataFrame = groupBy().agg(exprs)

/**
* Aggregates on the entire [[DataFrame]] without groups.
* {{
* {{{
* // df.agg(...) is a shorthand for df.groupBy().agg(...)
* df.agg(max($"age"), avg($"salary"))
* df.groupBy().agg(max($"age"), avg($"salary"))
* }}
* }}}
* @group dfops
*/
@scala.annotation.varargs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
StatFunctions.calculateCov(df, Seq(col1, col2))
}

/*
/**
* Calculates the correlation of two columns of a DataFrame. Currently only supports the Pearson
* Correlation Coefficient. For Spearman Correlation, consider using RDD methods found in
* MLlib's Statistics.
Expand Down Expand Up @@ -75,7 +75,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
* each row.
* @param col2 The name of the second column. Distinct items will make the column names
* of the DataFrame.
* @return A Local DataFrame containing the table
* @return A DataFrame containing for the contingency table.
*/
def crosstab(col1: String, col2: String): DataFrame = {
StatFunctions.crossTabulate(df, col1, col2)
Expand Down Expand Up @@ -110,14 +110,25 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
}

/**
* Python friendly implementation for `freqItems`
* (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
* frequent element count algorithm described in
* [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
*
* @param cols the names of the columns to search frequent items in.
* @return A Local DataFrame with the Array of frequent items for each column.
*/
def freqItems(cols: Seq[String], support: Double): DataFrame = {
FrequentItems.singlePassFreqItems(df, cols, support)
}

/**
* Python friendly implementation for `freqItems` with a default `support` of 1%.
* (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
* frequent element count algorithm described in
* [[http://dx.doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou]].
* Uses a `default` support of 1%.
*
* @param cols the names of the columns to search frequent items in.
* @return A Local DataFrame with the Array of frequent items for each column.
*/
def freqItems(cols: Seq[String]): DataFrame = {
FrequentItems.singlePassFreqItems(df, cols, 0.01)
Expand Down
14 changes: 7 additions & 7 deletions sql/core/src/main/scala/org/apache/spark/sql/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import scala.reflect.runtime.universe.{TypeTag, typeTag}

import org.apache.spark.annotation.Experimental
import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, Star}
import org.apache.spark.sql.catalyst.analysis.{UnresolvedFunction, Star}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.mathfuncs._
import org.apache.spark.sql.types._
Expand Down Expand Up @@ -86,21 +86,21 @@ object functions {

/**
* Returns a sort expression based on ascending order of the column.
* {{
* {{{
* // Sort by dept in ascending order, and then age in descending order.
* df.sort(asc("dept"), desc("age"))
* }}
* }}}
*
* @group sort_funcs
*/
def asc(columnName: String): Column = Column(columnName).asc

/**
* Returns a sort expression based on the descending order of the column.
* {{
* {{{
* // Sort by dept in ascending order, and then age in descending order.
* df.sort(asc("dept"), desc("age"))
* }}
* }}}
*
* @group sort_funcs
*/
Expand Down Expand Up @@ -353,13 +353,13 @@ object functions {

/**
* Inversion of boolean expression, i.e. NOT.
* {{
* {{{
* // Scala: select rows that are not active (isActive === false)
* df.filter( !df("isActive") )
*
* // Java:
* df.filter( not(df.col("isActive")) );
* }}
* }}}
*
* @group normal_funcs
*/
Expand Down

0 comments on commit d001700

Please sign in to comment.