diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala index 9ce1f01056462..cbc214d442064 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala @@ -17,11 +17,12 @@ package org.apache.spark.sql.catalyst.expressions +import com.typesafe.scalalogging.slf4j.Logging + import org.apache.spark.sql.catalyst.trees import org.apache.spark.sql.catalyst.errors.attachTree import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.Logging /** * A bound reference points to a specific slot in the input tuple, allowing the actual value diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala index ff10e198a3cee..b8f810447862f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Row.scala @@ -34,7 +34,7 @@ object Row { def unapplySeq(row: Row): Some[Seq[Any]] = Some(row) /** - * Construct a [[Row]] with the given values. + * This method can be used to construct a [[Row]] with the given values. */ def apply(values: Any*): Row = new GenericRow(values.toArray) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala index 67833664b35ae..4ff5791635f4c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/QueryPlanner.scala @@ -17,7 +17,8 @@ package org.apache.spark.sql.catalyst.planning -import org.apache.spark.sql.Logging +import com.typesafe.scalalogging.slf4j.Logging + import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.trees.TreeNode diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala index 026692abe067d..b8ae326be6fab 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.planning import scala.annotation.tailrec -import org.apache.spark.sql.Logging +import com.typesafe.scalalogging.slf4j.Logging import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans._ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index 0320682d47ce5..1e8fe098f7c1b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -128,7 +128,7 @@ abstract class QueryPlan[PlanType <: TreeNode[PlanType]] extends TreeNode[PlanTy def schema: StructType = StructType.fromAttributes(output) /** Returns the output schema in the tree format. */ - def schemaString: String = schema.schemaString + def schemaString: String = schema.structString /** Prints out the schema in the tree format */ def printSchema(): Unit = println(schemaString) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala index 1076537bc7602..f39bff8c25164 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala @@ -17,7 +17,8 @@ package org.apache.spark.sql.catalyst.rules -import org.apache.spark.sql.Logging +import com.typesafe.scalalogging.slf4j.Logging + import org.apache.spark.sql.catalyst.trees.TreeNode abstract class Rule[TreeType <: TreeNode[_]] extends Logging { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala index e32adb76fe146..e70ce66cb745f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala @@ -15,9 +15,9 @@ * limitations under the License. */ -package org.apache.spark.sql -package catalyst -package rules +package org.apache.spark.sql.catalyst.rules + +import com.typesafe.scalalogging.slf4j.Logging import org.apache.spark.sql.catalyst.trees.TreeNode import org.apache.spark.sql.catalyst.util.sideBySide diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/package.scala index d159ecdd5d781..9a28d035a10a3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/package.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.catalyst -import org.apache.spark.sql.Logger - /** * A library for easily manipulating trees of operators. Operators that extend TreeNode are * granted the following interface: @@ -35,5 +33,6 @@ import org.apache.spark.sql.Logger */ package object trees { // Since we want tree nodes to be lightweight, we create one logger for all treenode instances. - protected val logger = Logger("catalyst.trees") + protected val logger = + com.typesafe.scalalogging.slf4j.Logger(org.slf4j.LoggerFactory.getLogger("catalyst.trees")) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala index 4f7bc23a7412e..e07db00b749c4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala @@ -30,7 +30,7 @@ import org.apache.spark.util.Utils /** * */ -object DataType extends RegexParsers { +protected[sql] object DataType extends RegexParsers { protected lazy val primitiveType: Parser[DataType] = "StringType" ^^^ StringType | "FloatType" ^^^ FloatType | @@ -84,6 +84,21 @@ object DataType extends RegexParsers { case Success(result, _) => result case failure: NoSuccess => sys.error(s"Unsupported dataType: $asString, $failure") } + + protected[types] def buildFormattedString( + dataType: DataType, + prefix: String, + builder: StringBuilder): Unit = { + dataType match { + case array: ArrayType => + array.buildFormattedString(prefix, builder) + case struct: StructType => + struct.buildFormattedString(prefix, builder) + case map: MapType => + map.buildFormattedString(prefix, builder) + case _ => + } + } } abstract class DataType { @@ -244,6 +259,7 @@ case object FloatType extends FractionalType { } object ArrayType { + /** Construct a [[ArrayType]] object with the given element type. The `containsNull` is false. */ def apply(elementType: DataType): ArrayType = ArrayType(elementType, false) } @@ -251,15 +267,7 @@ case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataT private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = { builder.append( s"${prefix}-- element: ${elementType.simpleString} (containsNull = ${containsNull})\n") - elementType match { - case array: ArrayType => - array.buildFormattedString(s"$prefix |", builder) - case struct: StructType => - struct.buildFormattedString(s"$prefix |", builder) - case map: MapType => - map.buildFormattedString(s"$prefix |", builder) - case _ => - } + DataType.buildFormattedString(elementType, s"$prefix |", builder) } def simpleString: String = "array" @@ -269,48 +277,41 @@ case class StructField(name: String, dataType: DataType, nullable: Boolean) { private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = { builder.append(s"${prefix}-- ${name}: ${dataType.simpleString} (nullable = ${nullable})\n") - dataType match { - case array: ArrayType => - array.buildFormattedString(s"$prefix |", builder) - case struct: StructType => - struct.buildFormattedString(s"$prefix |", builder) - case map: MapType => - map.buildFormattedString(s"$prefix |", builder) - case _ => - } + DataType.buildFormattedString(dataType, s"$prefix |", builder) } } object StructType { - def fromAttributes(attributes: Seq[Attribute]): StructType = + protected[sql] def fromAttributes(attributes: Seq[Attribute]): StructType = StructType(attributes.map(a => StructField(a.name, a.dataType, a.nullable))) private def validateFields(fields: Seq[StructField]): Boolean = fields.map(field => field.name).distinct.size == fields.size - - def apply[A <: String: ClassTag, B <: DataType: ClassTag](fields: (A, B)*): StructType = - StructType(fields.map(field => StructField(field._1, field._2, true))) - - def apply[A <: String: ClassTag, B <: DataType: ClassTag, C <: Boolean: ClassTag]( - fields: (A, B, C)*): StructType = - StructType(fields.map(field => StructField(field._1, field._2, field._3))) } case class StructType(fields: Seq[StructField]) extends DataType { require(StructType.validateFields(fields), "Found fields with the same name.") + /** + * Extracts a [[StructField]] of the given name. If the [[StructType]] object does not + * have a name matching the given name, `null` will be returned. + */ def apply(name: String): StructField = { fields.find(f => f.name == name).orNull } - def apply(names: String*): StructType = { - val nameSet = names.toSet - StructType(fields.filter(f => nameSet.contains(f.name))) + /** + * Returns a [[StructType]] containing [[StructField]]s of the given names. + * Those names which do not have matching fields will be ignored. + */ + def apply(names: Set[String]): StructType = { + StructType(fields.filter(f => names.contains(f.name))) } - def toAttributes = fields.map(f => AttributeReference(f.name, f.dataType, f.nullable)()) + protected[sql] def toAttributes = + fields.map(f => AttributeReference(f.name, f.dataType, f.nullable)()) - def schemaString: String = { + def structString: String = { val builder = new StringBuilder builder.append("root\n") val prefix = " |" @@ -319,7 +320,7 @@ case class StructType(fields: Seq[StructField]) extends DataType { builder.toString() } - def printSchema(): Unit = println(schemaString) + def printStruct(): Unit = println(structString) private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = { fields.foreach(field => field.buildFormattedString(prefix, builder)) @@ -331,26 +332,8 @@ case class StructType(fields: Seq[StructField]) extends DataType { case class MapType(keyType: DataType, valueType: DataType) extends DataType { private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = { builder.append(s"${prefix}-- key: ${keyType.simpleString}\n") - keyType match { - case array: ArrayType => - array.buildFormattedString(s"$prefix |", builder) - case struct: StructType => - struct.buildFormattedString(s"$prefix |", builder) - case map: MapType => - map.buildFormattedString(s"$prefix |", builder) - case _ => - } - - builder.append(s"${prefix}-- value: ${valueType.simpleString}\n") - valueType match { - case array: ArrayType => - array.buildFormattedString(s"$prefix |", builder) - case struct: StructType => - struct.buildFormattedString(s"$prefix |", builder) - case map: MapType => - map.buildFormattedString(s"$prefix |", builder) - case _ => - } + DataType.buildFormattedString(keyType, s"$prefix |", builder) + DataType.buildFormattedString(valueType, s"$prefix |", builder) } def simpleString: String = "map" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/package.scala deleted file mode 100644 index 2099804073c08..0000000000000 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/package.scala +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark - -/** - * Allows the execution of relational queries, including those expressed in SQL using Spark. - * - * Note that this package is located in catalyst instead of in core so that all subprojects can - * inherit the settings from this package object. - */ -package object sql { - - protected[sql] def Logger(name: String) = - com.typesafe.scalalogging.slf4j.Logger(org.slf4j.LoggerFactory.getLogger(name)) - - protected[sql] type Logging = com.typesafe.scalalogging.slf4j.Logging - - type Row = catalyst.expressions.Row - - val Row = catalyst.expressions.Row - - type DataType = catalyst.types.DataType - - val DataType = catalyst.types.DataType - - val NullType = catalyst.types.NullType - - val StringType = catalyst.types.StringType - - val BinaryType = catalyst.types.BinaryType - - val BooleanType = catalyst.types.BooleanType - - val TimestampType = catalyst.types.TimestampType - - val DecimalType = catalyst.types.DecimalType - - val DoubleType = catalyst.types.DoubleType - - val FloatType = catalyst.types.FloatType - - val ByteType = catalyst.types.ByteType - - val IntegerType = catalyst.types.IntegerType - - val LongType = catalyst.types.LongType - - val ShortType = catalyst.types.ShortType - - type ArrayType = catalyst.types.ArrayType - - val ArrayType = catalyst.types.ArrayType - - type MapType = catalyst.types.MapType - - val MapType = catalyst.types.MapType - - type StructType = catalyst.types.StructType - - val StructType = catalyst.types.StructType - - type StructField = catalyst.types.StructField - - val StructField = catalyst.types.StructField -} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 355d545cad89e..197942c7b0f66 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -99,7 +99,9 @@ class SQLContext(@transient val sparkContext: SparkContext) /** * Creates a [[SchemaRDD]] from an [[RDD]] by applying a schema to this RDD and using a function * that will be applied to each partition of the RDD to convert RDD records to [[Row]]s. - * + * Similar to `RDD.mapPartitions``, this function can be used to improve performance where there + * is other setup work that can be amortized and used repeatedly for all of the + * elements in a partition. * @group userf */ def applySchemaToPartitions[A]( @@ -128,7 +130,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * * @group userf */ - def jsonFile(path: String): SchemaRDD = jsonFile(path, 1.0, None) + def jsonFile(path: String): SchemaRDD = jsonFile(path, 1.0) /** * Loads a JSON file (one object per line) and applies the given schema, @@ -136,15 +138,18 @@ class SQLContext(@transient val sparkContext: SparkContext) * * @group userf */ - def jsonFile(path: String, schema: StructType): SchemaRDD = jsonFile(path, 1.0, Option(schema)) + def jsonFile(path: String, schema: StructType): SchemaRDD = { + val json = sparkContext.textFile(path) + jsonRDD(json, schema) + } /** * :: Experimental :: */ @Experimental - def jsonFile(path: String, samplingRatio: Double, schema: Option[StructType]): SchemaRDD = { + def jsonFile(path: String, samplingRatio: Double): SchemaRDD = { val json = sparkContext.textFile(path) - jsonRDD(json, samplingRatio, schema) + jsonRDD(json, samplingRatio) } /** @@ -154,7 +159,7 @@ class SQLContext(@transient val sparkContext: SparkContext) * * @group userf */ - def jsonRDD(json: RDD[String]): SchemaRDD = jsonRDD(json, 1.0, None) + def jsonRDD(json: RDD[String]): SchemaRDD = jsonRDD(json, 1.0) /** * Loads an RDD[String] storing JSON objects (one object per record) and applies the given schema, @@ -162,22 +167,30 @@ class SQLContext(@transient val sparkContext: SparkContext) * * @group userf */ - def jsonRDD(json: RDD[String], schema: StructType): SchemaRDD = jsonRDD(json, 1.0, Option(schema)) + def jsonRDD(json: RDD[String], schema: StructType): SchemaRDD = { + val appliedSchema = + Option(schema).getOrElse(JsonRDD.nullTypeToStringType(JsonRDD.inferSchema(json, 1.0))) + + applySchemaToPartitions( + json, + appliedSchema, + JsonRDD.jsonStringToRow(appliedSchema, _: Iterator[String])) + } /** * :: Experimental :: */ @Experimental - def jsonRDD(json: RDD[String], samplingRatio: Double, schema: Option[StructType]): SchemaRDD = { - val appliedSchema = - schema.getOrElse(JsonRDD.nullTypeToStringType(JsonRDD.inferSchema(json, samplingRatio))) + def jsonRDD(json: RDD[String], samplingRatio: Double): SchemaRDD = { + val schema = JsonRDD.nullTypeToStringType(JsonRDD.inferSchema(json, samplingRatio)) applySchemaToPartitions( json, - appliedSchema, - JsonRDD.jsonStringToRow(appliedSchema, _: Iterator[String])) + schema, + JsonRDD.jsonStringToRow(schema, _: Iterator[String])) } + /** * :: Experimental :: * Creates an empty parquet file with the schema of class `A`, which can be registered as a table. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala index d60b4eca52ff0..bf1a2a866e58d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDDLike.scala @@ -123,12 +123,21 @@ private[sql] trait SchemaRDDLike { def saveAsTable(tableName: String): Unit = sqlContext.executePlan(InsertIntoCreatedTable(None, tableName, logicalPlan)).toRdd - /** Returns the schema. */ + /** Returns the schema of this SchemaRDD (represented by a [[StructType]]). + * + * @group schema + */ def schema: StructType = queryExecution.analyzed.schema - /** Returns the output schema in the tree format. */ - def schemaString: String = schema.schemaString + /** Returns the schema as a string in the tree format. + * + * @group schema + */ + def schemaString: String = schema.structString - /** Prints out the schema in the tree format. */ + /** Prints out the schema. + * + * @group schema + */ def printSchema(): Unit = println(schemaString) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/package-info.java b/sql/core/src/main/scala/org/apache/spark/sql/package-info.java similarity index 100% rename from sql/catalyst/src/main/scala/org/apache/spark/sql/package-info.java rename to sql/core/src/main/scala/org/apache/spark/sql/package-info.java diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/package.scala new file mode 100644 index 0000000000000..4d36f639f4a00 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala @@ -0,0 +1,350 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark + +import org.apache.spark.annotation.DeveloperApi + +/** + * Allows the execution of relational queries, including those expressed in SQL using Spark. + * + * @groupname dataType Data types + * @groupdesc Spark SQL data types. + * @groupprio dataType -2 + * @groupname row Row + * @groupprio row -1 + */ +package object sql { + + protected[sql] type Logging = com.typesafe.scalalogging.slf4j.Logging + + /** + * :: DeveloperApi :: + * + * Represents one row of output from a relational operator. + * @group row + */ + @DeveloperApi + type Row = catalyst.expressions.Row + + /** + * :: DeveloperApi :: + * + * A [[Row]] object can be constructed by providing field values. Example: + * {{{ + * import org.apache.spark.sql._ + * + * Row(value1, value2, value3, ...) + * }}} + * + * Fields in a [[Row]] object can be extracted in a pattern match. Example: + * {{{ + * import org.apache.spark.sql._ + * + * val pairs = sql("SELECT key, value FROM src").rdd.map { + * case Row(key: Int, value: String) => + * key -> value + * } + * }}} + * @group row + */ + @DeveloperApi + val Row = catalyst.expressions.Row + + /** + * :: DeveloperApi :: + * + * The base type of all Spark SQL data types. + * + * @group dataType + */ + @DeveloperApi + type DataType = catalyst.types.DataType + + /** + * :: DeveloperApi :: + * + * The data type representing `String` values + * + * @group dataType + */ + @DeveloperApi + val StringType = catalyst.types.StringType + + /** + * :: DeveloperApi :: + * + * The data type representing `Array[Byte]` values. + * + * @group dataType + */ + @DeveloperApi + val BinaryType = catalyst.types.BinaryType + + /** + * :: DeveloperApi :: + * + * The data type representing `Boolean` values. + * + *@group dataType + */ + @DeveloperApi + val BooleanType = catalyst.types.BooleanType + + /** + * :: DeveloperApi :: + * + * The data type representing `java.sql.Timestamp` values + * + * @group dataType + */ + @DeveloperApi + val TimestampType = catalyst.types.TimestampType + + /** + * :: DeveloperApi :: + * + * The data type representing `scala.math.BigDecimal` values. + * + * @group dataType + */ + @DeveloperApi + val DecimalType = catalyst.types.DecimalType + + /** + * :: DeveloperApi :: + * + * The data type representing `Double` values. + * + * @group dataType + */ + @DeveloperApi + val DoubleType = catalyst.types.DoubleType + + /** + * :: DeveloperApi :: + * + * The data type representing `Float` values. + * + * @group dataType + */ + @DeveloperApi + val FloatType = catalyst.types.FloatType + + /** + * :: DeveloperApi :: + * + * The data type representing `Byte` values + * + * @group dataType + */ + @DeveloperApi + val ByteType = catalyst.types.ByteType + + /** + * :: DeveloperApi :: + * + * The data type representing `Int` values. + * + * @group dataType + */ + @DeveloperApi + val IntegerType = catalyst.types.IntegerType + + /** + * :: DeveloperApi :: + * + * The data type representing `Long` values. + * + * @group dataType + */ + @DeveloperApi + val LongType = catalyst.types.LongType + + /** + * :: DeveloperApi :: + * + * The data type representing `Short` values. + * + * @group dataType + */ + @DeveloperApi + val ShortType = catalyst.types.ShortType + + /** + * :: DeveloperApi :: + * + * The data type representing `Seq`s. + * An [[ArrayType]] object comprises two fields, `elementType: [[DataType]]` and + * `containsNull: Boolean`. The field of `elementType` is used to specify the type of + * array elements. The field of `containsNull` is used to specify if the array can have + * any `null` value. + * + * @group dataType + */ + @DeveloperApi + type ArrayType = catalyst.types.ArrayType + + /** + * :: DeveloperApi :: + * + * An [[ArrayType]] object can be constructed with two ways, + * {{{ + * ArrayType(elementType: DataType, containsNull: Boolean) + * }}} and + * {{{ + * ArrayType(elementType: DataType) + * }}} + * For `ArrayType(elementType)`, the field of `containsNull` is set to `false`. + * + * @group dataType + */ + @DeveloperApi + val ArrayType = catalyst.types.ArrayType + + /** + * :: DeveloperApi :: + * + * The data type representing `Map`s. A [[MapType]] object comprises two fields, + * `keyType: [[DataType]]` and `valueType: [[DataType]]`. + * The field of `keyType` is used to specify the type of keys in the map. + * The field of `valueType` is used to specify the type of values in the map. + * For a [[MapType]] column, keys and values should not contain any `null` value. + * + * @group dataType + */ + @DeveloperApi + type MapType = catalyst.types.MapType + + /** + * :: DeveloperApi :: + * + * A [[MapType]] can be constructed by + * {{{ + * MapType(keyType: DataType, valueType: DataType) + * }}} + * + * @group dataType + */ + @DeveloperApi + val MapType = catalyst.types.MapType + + /** + * :: DeveloperApi :: + * + * The data type representing [[Row]]s. + * A [[StructType]] object comprises a [[Seq]] of [[StructField]]s. + * + * @group dataType + */ + @DeveloperApi + type StructType = catalyst.types.StructType + + /** + * :: DeveloperApi :: + * + * A [[StructType]] object can be constructed by + * {{{ + * StructType(fields: Seq[StructField]) + * }}} + * For a [[StructType]] object, one or multiple [[StructField]]s can be extracted by names. + * If multiple [[StructField]]s are extracted, a [[StructType]] object will be returned. + * If a provided name does not have a matching field, it will be ignored. For the case + * of extracting a single StructField, a `null` will be returned. + * Example: + * {{{ + * import org.apache.spark.sql._ + * + * val struct = + * StructType( + * StructField("a", IntegerType, true) :: + * StructField("b", LongType, false) :: + * StructField("c", BooleanType, false) :: Nil) + * + * // Extract a single StructField. + * val singleField = struct("b") + * // singleField: StructField = StructField(b,LongType,false) + * + * // This struct does not have a field called "d". null will be returned. + * val nonExisting = struct("d") + * // nonExisting: StructField = null + * + * // Extract multiple StructFields. Field names are provided in a set. + * // A StructType object will be returned. + * val twoFields = struct(Set("b", "c")) + * // twoFields: StructType = + * // StructType(List(StructField(b,LongType,false), StructField(c,BooleanType,false))) + * + * // Those names do not have matching fields will be ignored. + * // For the case shown below, "d" will be ignored and + * // it is treated as struct(Set("b", "c")). + * val ignoreNonExisting = struct(Set("b", "c", "d")) + * // ignoreNonExisting: StructType = + * // StructType(List(StructField(b,LongType,false), StructField(c,BooleanType,false))) + * }}} + * + * A [[Row]] object is used as a value of the StructType. + * Example: + * {{{ + * import org.apache.spark.sql._ + * + * val innerStruct = + * StructType( + * StructField("f1", IntegerType, true) :: + * StructField("f2", LongType, false) :: + * StructField("f3", BooleanType, false) :: Nil) + * + * val struct = StructType( + * StructField("a", innerStruct, true) :: Nil) + * + * // Create a Row with the schema defined by struct + * val row = Row(Row(1, 2, true)) + * // row: Row = [[1,2,true]] + * }}} + * + * @group dataType + */ + @DeveloperApi + val StructType = catalyst.types.StructType + + /** + * :: DeveloperApi :: + * + * A [[StructField]] object represents a field in a [[StructType]] object. + * A [[StructField]] object comprises three fields, `name: [[String]]`, `dataType: [[DataType]]`, + * and `nullable: Boolean`. The field of `name` is the name of a `StructField`. The field of + * `dataType` specifies the data type of a `StructField`. + * The field of `nullable` specifies if values of a `StructField` can contain `null` values. + * + * @group dataType + */ + @DeveloperApi + type StructField = catalyst.types.StructField + + /** + * :: DeveloperApi :: + * + * A [[StructField]] object can be constructed by + * {{{ + * StructField(name: String, dataType: DataType, nullable: Boolean) + * }}} + * + * @group dataType + */ + @DeveloperApi + val StructField = catalyst.types.StructField +}