From 7afff0ce204d37d8074cc6e74048f32144e85fca Mon Sep 17 00:00:00 2001 From: Lottie Bryant Date: Wed, 12 Apr 2023 16:38:33 +0100 Subject: [PATCH 1/2] Issue-235: WIP Fix Docs --- core/src/main/scala/doric/sem/JoinOps.scala | 2 +- core/src/main/scala/doric/syntax/AggregationColumns.scala | 2 +- core/src/main/scala/doric/syntax/ArrayColumns.scala | 5 +++-- core/src/main/scala/doric/syntax/BinaryColumns.scala | 2 +- core/src/main/scala/doric/syntax/BooleanColumns.scala | 2 +- core/src/main/scala/doric/syntax/CommonColumns.scala | 2 +- core/src/main/scala/doric/syntax/ControlStructures.scala | 2 +- core/src/main/scala/doric/syntax/DStructs.scala | 2 +- core/src/main/scala/doric/syntax/DateColumns.scala | 2 +- core/src/main/scala/doric/syntax/LiteralConversions.scala | 2 +- core/src/main/scala/doric/syntax/MapColumns.scala | 2 +- core/src/main/scala/doric/syntax/NumericColumns.scala | 2 +- core/src/main/scala/doric/syntax/StringColumns.scala | 2 +- core/src/main/scala/doric/syntax/TimestampColumns.scala | 2 +- core/src/main/scala/doric/syntax/package.scala | 4 ++-- .../scala/doric/syntax/BinaryColumns30_31.scala | 2 +- .../scala/doric/syntax/NumericColumns2_31.scala | 2 +- .../scala/doric/syntax/AggregationColumns31.scala | 2 +- .../scala/doric/syntax/BooleanColumns31.scala | 2 +- .../scala/doric/syntax/NumericColumns31.scala | 2 +- .../scala/doric/syntax/StringColumns31.scala | 2 +- .../spark_3.2_3.3/scala/doric/syntax/BinaryColumns32.scala | 2 +- .../spark_3.2_3.3/scala/doric/syntax/NumericColumns32.scala | 2 +- 23 files changed, 26 insertions(+), 25 deletions(-) diff --git a/core/src/main/scala/doric/sem/JoinOps.scala b/core/src/main/scala/doric/sem/JoinOps.scala index 61a012137..b30e4ca2d 100644 --- a/core/src/main/scala/doric/sem/JoinOps.scala +++ b/core/src/main/scala/doric/sem/JoinOps.scala @@ -5,7 +5,7 @@ import doric.{DoricColumn, DoricJoinColumn} import org.apache.spark.sql.{DataFrame, Dataset} -private[sem] trait JoinOps { +protected trait JoinOps { implicit class DataframeJoinSyntax[A](df: Dataset[A]) { /** diff --git a/core/src/main/scala/doric/syntax/AggregationColumns.scala b/core/src/main/scala/doric/syntax/AggregationColumns.scala index 26adb63b7..7693317eb 100644 --- a/core/src/main/scala/doric/syntax/AggregationColumns.scala +++ b/core/src/main/scala/doric/syntax/AggregationColumns.scala @@ -8,7 +8,7 @@ import doric.Doric import org.apache.spark.sql.{Column, functions => f} import org.apache.spark.sql.catalyst.expressions.aggregate.Sum -private[syntax] trait AggregationColumns { +protected trait AggregationColumns { /** * Aggregate function: returns the sum of all values in the expression. diff --git a/core/src/main/scala/doric/syntax/ArrayColumns.scala b/core/src/main/scala/doric/syntax/ArrayColumns.scala index a2f4368ab..e968d5662 100644 --- a/core/src/main/scala/doric/syntax/ArrayColumns.scala +++ b/core/src/main/scala/doric/syntax/ArrayColumns.scala @@ -33,7 +33,7 @@ protected final case class Zipper[T1, T2, F[_]: CollectionType]( } } -private[syntax] trait ArrayColumns { +protected trait ArrayColumns { /** * Concatenates multiple array columns together into a single column. @@ -580,7 +580,8 @@ private[syntax] trait ArrayColumns { * @note * if `start` == 0 an exception will be thrown * @group Array Type - * @see [[org.apache.spark.sql.functions.slice(x:org\.apache\.spark\.sql\.Column,start:org\.apache\.spark\.sql\.Column,length* org.apache.spark.sql.functions.slice]] + * @see org.apache.spark.sql.functions.slice + * @todo scaladoc link (issue #135) */ def slice(start: IntegerColumn, length: IntegerColumn): DoricColumn[F[T]] = (col.elem, start.elem, length.elem) diff --git a/core/src/main/scala/doric/syntax/BinaryColumns.scala b/core/src/main/scala/doric/syntax/BinaryColumns.scala index f50842db2..65184942b 100644 --- a/core/src/main/scala/doric/syntax/BinaryColumns.scala +++ b/core/src/main/scala/doric/syntax/BinaryColumns.scala @@ -6,7 +6,7 @@ import doric.types.{BinaryType, SparkType} import org.apache.spark.sql.{functions => f} -private[syntax] trait BinaryColumns { +protected trait BinaryColumns { /** * Concatenates multiple binary columns together into a single column. diff --git a/core/src/main/scala/doric/syntax/BooleanColumns.scala b/core/src/main/scala/doric/syntax/BooleanColumns.scala index faa20e8f1..19c7154ec 100644 --- a/core/src/main/scala/doric/syntax/BooleanColumns.scala +++ b/core/src/main/scala/doric/syntax/BooleanColumns.scala @@ -5,7 +5,7 @@ import doric.DoricColumn.sparkFunction import org.apache.spark.sql.{functions => f} -private[syntax] trait BooleanColumns { +protected trait BooleanColumns { /** * Inversion of boolean expression, i.e. NOT. diff --git a/core/src/main/scala/doric/syntax/CommonColumns.scala b/core/src/main/scala/doric/syntax/CommonColumns.scala index 875e6d9bd..3b742514c 100644 --- a/core/src/main/scala/doric/syntax/CommonColumns.scala +++ b/core/src/main/scala/doric/syntax/CommonColumns.scala @@ -8,7 +8,7 @@ import doric.types.{Casting, SparkType, UnsafeCasting} import org.apache.spark.sql.{Column, functions => f} import org.apache.spark.sql.catalyst.expressions.ArrayRepeat -private[syntax] trait CommonColumns extends ColGetters[NamedDoricColumn] { +protected trait CommonColumns extends ColGetters[NamedDoricColumn] { /** * Returns the first column that is not null, or null if all inputs are null. diff --git a/core/src/main/scala/doric/syntax/ControlStructures.scala b/core/src/main/scala/doric/syntax/ControlStructures.scala index b7338514c..3d110d2fc 100644 --- a/core/src/main/scala/doric/syntax/ControlStructures.scala +++ b/core/src/main/scala/doric/syntax/ControlStructures.scala @@ -3,7 +3,7 @@ package syntax import doric.types.SparkType -private[syntax] trait ControlStructures { +protected trait ControlStructures { /** * Initialize a when builder diff --git a/core/src/main/scala/doric/syntax/DStructs.scala b/core/src/main/scala/doric/syntax/DStructs.scala index 21d1c2540..86096d5a4 100644 --- a/core/src/main/scala/doric/syntax/DStructs.scala +++ b/core/src/main/scala/doric/syntax/DStructs.scala @@ -14,7 +14,7 @@ import shapeless.{::, HList, LabelledGeneric, Witness} import scala.jdk.CollectionConverters._ import scala.language.dynamics -private[syntax] trait DStructs { +protected trait DStructs { /** * Creates a struct with the columns diff --git a/core/src/main/scala/doric/syntax/DateColumns.scala b/core/src/main/scala/doric/syntax/DateColumns.scala index 368f89403..1c2a2fad2 100644 --- a/core/src/main/scala/doric/syntax/DateColumns.scala +++ b/core/src/main/scala/doric/syntax/DateColumns.scala @@ -9,7 +9,7 @@ import java.sql.Date import org.apache.spark.sql.{Column, functions => f} import org.apache.spark.sql.catalyst.expressions.{AddMonths, CurrentDate, DateAdd, DateFormatClass, DateSub, MonthsBetween, NextDay, TruncDate, TruncTimestamp} -private[syntax] trait DateColumns { +protected trait DateColumns { /** * Returns the current date at the start of query evaluation as a date column. diff --git a/core/src/main/scala/doric/syntax/LiteralConversions.scala b/core/src/main/scala/doric/syntax/LiteralConversions.scala index 1908626ce..a05e9fe77 100644 --- a/core/src/main/scala/doric/syntax/LiteralConversions.scala +++ b/core/src/main/scala/doric/syntax/LiteralConversions.scala @@ -5,7 +5,7 @@ import doric.sem.Location import doric.types.SparkType.Primitive import doric.types.{LiteralSparkType, SparkType} -private[syntax] trait LiteralConversions { +protected trait LiteralConversions { /** * Creates a literal with the provided value. diff --git a/core/src/main/scala/doric/syntax/MapColumns.scala b/core/src/main/scala/doric/syntax/MapColumns.scala index 9cd371a49..46ec3c308 100644 --- a/core/src/main/scala/doric/syntax/MapColumns.scala +++ b/core/src/main/scala/doric/syntax/MapColumns.scala @@ -8,7 +8,7 @@ import org.apache.spark.sql.{Column, Row, functions => f} import scala.jdk.CollectionConverters._ -private[syntax] trait MapColumns { +protected trait MapColumns { /** * Returns the union of all the given maps. diff --git a/core/src/main/scala/doric/syntax/NumericColumns.scala b/core/src/main/scala/doric/syntax/NumericColumns.scala index bf00e179f..5ddf5e4b2 100644 --- a/core/src/main/scala/doric/syntax/NumericColumns.scala +++ b/core/src/main/scala/doric/syntax/NumericColumns.scala @@ -7,7 +7,7 @@ import doric.types.{CollectionType, NumericType} import org.apache.spark.sql.catalyst.expressions.{BRound, FormatNumber, FromUnixTime, Rand, Randn, Round, UnaryMinus} import org.apache.spark.sql.{Column, functions => f} -private[syntax] trait NumericColumns { +protected trait NumericColumns { /** * Returns the current Unix timestamp (in seconds) as a long. diff --git a/core/src/main/scala/doric/syntax/StringColumns.scala b/core/src/main/scala/doric/syntax/StringColumns.scala index 0df95fe09..7685f2f30 100644 --- a/core/src/main/scala/doric/syntax/StringColumns.scala +++ b/core/src/main/scala/doric/syntax/StringColumns.scala @@ -8,7 +8,7 @@ import org.apache.spark.sql.{Column, functions => f} import scala.jdk.CollectionConverters._ -private[syntax] trait StringColumns { +protected trait StringColumns { /** * Concatenate string columns to form a single one diff --git a/core/src/main/scala/doric/syntax/TimestampColumns.scala b/core/src/main/scala/doric/syntax/TimestampColumns.scala index bb0d87550..bcf04935c 100644 --- a/core/src/main/scala/doric/syntax/TimestampColumns.scala +++ b/core/src/main/scala/doric/syntax/TimestampColumns.scala @@ -7,7 +7,7 @@ import doric.types.{SparkType, TimestampType} import org.apache.spark.sql.{Column, functions => f} import org.apache.spark.sql.catalyst.expressions.{FromUTCTimestamp, ToUTCTimestamp} -private[syntax] trait TimestampColumns { +protected trait TimestampColumns { /** * Returns the current timestamp at the start of query evaluation as a timestamp column. diff --git a/core/src/main/scala/doric/syntax/package.scala b/core/src/main/scala/doric/syntax/package.scala index b7398689f..82067a96e 100644 --- a/core/src/main/scala/doric/syntax/package.scala +++ b/core/src/main/scala/doric/syntax/package.scala @@ -20,7 +20,7 @@ package object syntax { * @tparam K type of "key" doric column to perform the search * @tparam V type of "value" doric column result */ - @inline private[syntax] def elementAtAbstract[T, K, V]( + @inline def elementAtAbstract[T, K, V]( dc: DoricColumn[T], key: DoricColumn[K] ): DoricColumn[V] = { @@ -39,7 +39,7 @@ package object syntax { * @param dc doric column to be reversed * @tparam T type of doric column (string or array) */ - @inline private[syntax] def reverseAbstract[T]( + @inline def reverseAbstract[T]( dc: DoricColumn[T] ): DoricColumn[T] = dc.elem.map(f.reverse).toDC diff --git a/core/src/main/spark_2.4_3.0_3.1/scala/doric/syntax/BinaryColumns30_31.scala b/core/src/main/spark_2.4_3.0_3.1/scala/doric/syntax/BinaryColumns30_31.scala index 31c996533..530c222d3 100644 --- a/core/src/main/spark_2.4_3.0_3.1/scala/doric/syntax/BinaryColumns30_31.scala +++ b/core/src/main/spark_2.4_3.0_3.1/scala/doric/syntax/BinaryColumns30_31.scala @@ -7,7 +7,7 @@ import doric.types.{BinaryType, SparkType} import org.apache.spark.sql.catalyst.expressions.Decode import org.apache.spark.sql.Column -private[syntax] trait BinaryColumns30_31 { +protected trait BinaryColumns30_31 { implicit class BinaryOperationsSyntax30_31[T: BinaryType: SparkType]( column: DoricColumn[T] diff --git a/core/src/main/spark_2.4_3.0_3.1/scala/doric/syntax/NumericColumns2_31.scala b/core/src/main/spark_2.4_3.0_3.1/scala/doric/syntax/NumericColumns2_31.scala index 64b97cb5e..9868bd658 100644 --- a/core/src/main/spark_2.4_3.0_3.1/scala/doric/syntax/NumericColumns2_31.scala +++ b/core/src/main/spark_2.4_3.0_3.1/scala/doric/syntax/NumericColumns2_31.scala @@ -6,7 +6,7 @@ import org.apache.spark.sql.Column import org.apache.spark.sql.{functions => f} import org.apache.spark.sql.catalyst.expressions.{ShiftLeft, ShiftRight, ShiftRightUnsigned} -private[syntax] trait NumericColumns2_31 { +protected trait NumericColumns2_31 { /** * INTEGRAL OPERATIONS diff --git a/core/src/main/spark_3.1_3.2_3.3/scala/doric/syntax/AggregationColumns31.scala b/core/src/main/spark_3.1_3.2_3.3/scala/doric/syntax/AggregationColumns31.scala index f1dc6e724..bf2411464 100644 --- a/core/src/main/spark_3.1_3.2_3.3/scala/doric/syntax/AggregationColumns31.scala +++ b/core/src/main/spark_3.1_3.2_3.3/scala/doric/syntax/AggregationColumns31.scala @@ -5,7 +5,7 @@ import doric.types.DoubleC import org.apache.spark.sql.{functions => f} -private[syntax] trait AggregationColumns31 { +protected trait AggregationColumns31 { /** * Aggregate function: returns the approximate `percentile` of the numeric column `col` which diff --git a/core/src/main/spark_3.1_3.2_3.3/scala/doric/syntax/BooleanColumns31.scala b/core/src/main/spark_3.1_3.2_3.3/scala/doric/syntax/BooleanColumns31.scala index abfd30f4d..a19802d28 100644 --- a/core/src/main/spark_3.1_3.2_3.3/scala/doric/syntax/BooleanColumns31.scala +++ b/core/src/main/spark_3.1_3.2_3.3/scala/doric/syntax/BooleanColumns31.scala @@ -5,7 +5,7 @@ import cats.implicits._ import org.apache.spark.sql.{functions => f} -private[syntax] trait BooleanColumns31 { +protected trait BooleanColumns31 { /** * @group Boolean Type diff --git a/core/src/main/spark_3.1_3.2_3.3/scala/doric/syntax/NumericColumns31.scala b/core/src/main/spark_3.1_3.2_3.3/scala/doric/syntax/NumericColumns31.scala index 5749691d6..1f458e31c 100644 --- a/core/src/main/spark_3.1_3.2_3.3/scala/doric/syntax/NumericColumns31.scala +++ b/core/src/main/spark_3.1_3.2_3.3/scala/doric/syntax/NumericColumns31.scala @@ -4,7 +4,7 @@ package syntax import doric.types.NumericType import org.apache.spark.sql.{functions => f} -private[syntax] trait NumericColumns31 { +protected trait NumericColumns31 { implicit class NumericOperationsSyntax31[T: NumericType]( column: DoricColumn[T] ) { diff --git a/core/src/main/spark_3.1_3.2_3.3/scala/doric/syntax/StringColumns31.scala b/core/src/main/spark_3.1_3.2_3.3/scala/doric/syntax/StringColumns31.scala index 5917f59dc..4968680b1 100644 --- a/core/src/main/spark_3.1_3.2_3.3/scala/doric/syntax/StringColumns31.scala +++ b/core/src/main/spark_3.1_3.2_3.3/scala/doric/syntax/StringColumns31.scala @@ -4,7 +4,7 @@ package syntax import doric.sem.Location import org.apache.spark.sql.{functions => f} -private[syntax] trait StringColumns31 { +protected trait StringColumns31 { /** * Throws an exception with the provided error message. diff --git a/core/src/main/spark_3.2_3.3/scala/doric/syntax/BinaryColumns32.scala b/core/src/main/spark_3.2_3.3/scala/doric/syntax/BinaryColumns32.scala index 4b3231a8e..4e785dc08 100644 --- a/core/src/main/spark_3.2_3.3/scala/doric/syntax/BinaryColumns32.scala +++ b/core/src/main/spark_3.2_3.3/scala/doric/syntax/BinaryColumns32.scala @@ -7,7 +7,7 @@ import doric.types.{BinaryType, SparkType} import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.expressions.StringDecode -private[syntax] trait BinaryColumns32 { +protected trait BinaryColumns32 { implicit class BinaryOperationsSyntax32[T: BinaryType: SparkType]( column: DoricColumn[T] diff --git a/core/src/main/spark_3.2_3.3/scala/doric/syntax/NumericColumns32.scala b/core/src/main/spark_3.2_3.3/scala/doric/syntax/NumericColumns32.scala index eb6504e6e..2b25dbe23 100644 --- a/core/src/main/spark_3.2_3.3/scala/doric/syntax/NumericColumns32.scala +++ b/core/src/main/spark_3.2_3.3/scala/doric/syntax/NumericColumns32.scala @@ -6,7 +6,7 @@ import org.apache.spark.sql.Column import org.apache.spark.sql.{functions => f} import org.apache.spark.sql.catalyst.expressions.{ShiftLeft, ShiftRight, ShiftRightUnsigned} -private[syntax] trait NumericColumns32 { +protected trait NumericColumns32 { /** * INTEGRAL OPERATIONS From a463104dd237a56928a58eb08531d86a393a89fa Mon Sep 17 00:00:00 2001 From: Eduardo Ruiz Date: Sat, 22 Apr 2023 19:20:48 +0200 Subject: [PATCH 2/2] feat: [+] issue #235 todo docs and rebase --- .../main/scala/doric/syntax/ArrayColumns.scala | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/doric/syntax/ArrayColumns.scala b/core/src/main/scala/doric/syntax/ArrayColumns.scala index e968d5662..98f20d704 100644 --- a/core/src/main/scala/doric/syntax/ArrayColumns.scala +++ b/core/src/main/scala/doric/syntax/ArrayColumns.scala @@ -460,7 +460,8 @@ protected trait ArrayColumns { * }}} * * @group Array Type - * @see [[org.apache.spark.sql.functions.exists]] + * @see org.apache.spark.sql.functions.exists + * @todo scaladoc link not available for spark 2.4 */ def exists(fun: DoricColumn[T] => BooleanColumn): BooleanColumn = { val xv = x(col.getIndex(0)) @@ -607,12 +608,14 @@ protected trait ArrayColumns { * Merge two given arrays, element-wise, into a single array using a function. * If one array is shorter, nulls are appended at the end to match the length of the longer * array, before applying the function. + * * @example {{{ * df.select(colArray("val1").zipWith(col("val2"), concat(_, _))) * }}} * * @group Array Type - * @see [[org.apache.spark.sql.functions.zip_with]] + * @see org.apache.spark.sql.functions.zip_with + * @todo scaladoc link not available for spark 2.4 */ def zipWith[T2]( col2: DoricColumn[F[T2]] @@ -639,8 +642,8 @@ protected trait ArrayColumns { * The array in the first column is used for keys. * The array in the second column is used for values. * - * @throws RuntimeException if arrays doesn't have the same length. - * @throws RuntimeException if a key is null + * @throws java.lang.RuntimeException if arrays doesn't have the same length. + * or if a key is null * * @group Array Type * @see [[org.apache.spark.sql.functions.map_from_arrays]] @@ -653,8 +656,8 @@ protected trait ArrayColumns { * The array in the first column is used for keys. * The array in the second column is used for values. * - * @throws RuntimeException if arrays doesn't have the same length. - * @throws RuntimeException if a key is null + * @throws java.lang.RuntimeException if arrays doesn't have the same length + * or if a key is null * * @group Array Type * @see [[mapFromArrays]] @@ -664,7 +667,7 @@ protected trait ArrayColumns { /** * Converts a column containing a StructType into a JSON string with the specified schema. - * @throws IllegalArgumentException in the case of an unsupported type. + * @throws java.lang.IllegalArgumentException in the case of an unsupported type. * * @group Array Type * @see org.apache.spark.sql.functions.to_json(e:org\.apache\.spark\.sql\.Column,options:scala\.collection\.immutable\.Map\[java\.lang\.String,java\.lang\.String\]):* org.apache.spark.sql.functions.to_csv