Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-46395][CORE] Assign Spark configs to groups for use in documentation #44755

Closed
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ private[spark] class TypedConfigBuilder[T](
def createOptional: OptionalConfigEntry[T] = {
val entry = new OptionalConfigEntry[T](parent.key, parent._prependedKey,
parent._prependSeparator, parent._alternatives, converter, stringConverter, parent._doc,
parent._public, parent._version)
parent._public, parent._documentationGroups, parent._version)
parent._onCreate.foreach(_(entry))
entry
}
Expand All @@ -166,7 +166,8 @@ private[spark] class TypedConfigBuilder[T](
val transformedDefault = converter(stringConverter(default))
val entry = new ConfigEntryWithDefault[T](parent.key, parent._prependedKey,
parent._prependSeparator, parent._alternatives, transformedDefault, converter,
stringConverter, parent._doc, parent._public, parent._version)
stringConverter, parent._doc, parent._public, parent._documentationGroups,
parent._version)
parent._onCreate.foreach(_ (entry))
entry
}
Expand All @@ -176,7 +177,7 @@ private[spark] class TypedConfigBuilder[T](
def createWithDefaultFunction(defaultFunc: () => T): ConfigEntry[T] = {
val entry = new ConfigEntryWithDefaultFunction[T](parent.key, parent._prependedKey,
parent._prependSeparator, parent._alternatives, defaultFunc, converter, stringConverter,
parent._doc, parent._public, parent._version)
parent._doc, parent._public, parent._documentationGroups, parent._version)
parent._onCreate.foreach(_ (entry))
entry
}
Expand All @@ -188,7 +189,7 @@ private[spark] class TypedConfigBuilder[T](
def createWithDefaultString(default: String): ConfigEntry[T] = {
val entry = new ConfigEntryWithDefaultString[T](parent.key, parent._prependedKey,
parent._prependSeparator, parent._alternatives, default, converter, stringConverter,
parent._doc, parent._public, parent._version)
parent._doc, parent._public, parent._documentationGroups, parent._version)
parent._onCreate.foreach(_(entry))
entry
}
Expand All @@ -207,6 +208,7 @@ private[spark] case class ConfigBuilder(key: String) {
private[config] var _prependedKey: Option[String] = None
private[config] var _prependSeparator: String = ""
private[config] var _public = true
private[config] var _documentationGroups = Set.empty[String]
private[config] var _doc = ""
private[config] var _version = ""
private[config] var _onCreate: Option[ConfigEntry[_] => Unit] = None
Expand All @@ -217,6 +219,12 @@ private[spark] case class ConfigBuilder(key: String) {
this
}

def withDocumentationGroup(group: String): ConfigBuilder = {
require(group.matches("[a-z0-9-]+"))
_documentationGroups = _documentationGroups + group
this
}

def doc(s: String): ConfigBuilder = {
_doc = s
this
Expand Down Expand Up @@ -283,7 +291,7 @@ private[spark] case class ConfigBuilder(key: String) {

def fallbackConf[T](fallback: ConfigEntry[T]): ConfigEntry[T] = {
val entry = new FallbackConfigEntry(key, _prependedKey, _prependSeparator, _alternatives, _doc,
_public, _version, fallback)
_public, _documentationGroups, _version, fallback)
_onCreate.foreach(_(entry))
entry
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ The followings are best practices of naming configs for some common cases:
* @param doc the documentation for the configuration
* @param isPublic if this configuration is public to the user. If it's `false`, this
* configuration is only used internally and we should not expose it to users.
* @param documentationGroups groups that are used to organize related configs for display in our
* documentation.
* @param version the spark version when the configuration was released.
* @tparam T the value type
*/
Expand All @@ -80,6 +82,7 @@ private[spark] abstract class ConfigEntry[T] (
val stringConverter: T => String,
val doc: String,
val isPublic: Boolean,
val documentationGroups: Set[String],
val version: String) {

import ConfigEntry._
Expand Down Expand Up @@ -120,6 +123,7 @@ private class ConfigEntryWithDefault[T] (
stringConverter: T => String,
doc: String,
isPublic: Boolean,
documentationGroups: Set[String],
version: String)
extends ConfigEntry(
key,
Expand All @@ -130,6 +134,7 @@ private class ConfigEntryWithDefault[T] (
stringConverter,
doc,
isPublic,
documentationGroups,
version
) {

Expand All @@ -152,6 +157,7 @@ private class ConfigEntryWithDefaultFunction[T] (
stringConverter: T => String,
doc: String,
isPublic: Boolean,
documentationGroups: Set[String],
version: String)
extends ConfigEntry(
key,
Expand All @@ -162,6 +168,7 @@ private class ConfigEntryWithDefaultFunction[T] (
stringConverter,
doc,
isPublic,
documentationGroups,
version
) {

Expand All @@ -184,6 +191,7 @@ private class ConfigEntryWithDefaultString[T] (
stringConverter: T => String,
doc: String,
isPublic: Boolean,
documentationGroups: Set[String],
version: String)
extends ConfigEntry(
key,
Expand All @@ -194,6 +202,7 @@ private class ConfigEntryWithDefaultString[T] (
stringConverter,
doc,
isPublic,
documentationGroups,
version
) {

Expand All @@ -220,6 +229,7 @@ private[spark] class OptionalConfigEntry[T](
val rawStringConverter: T => String,
doc: String,
isPublic: Boolean,
documentationGroups: Set[String],
version: String)
extends ConfigEntry[Option[T]](
key,
Expand All @@ -230,6 +240,7 @@ private[spark] class OptionalConfigEntry[T](
v => v.map(rawStringConverter).orNull,
doc,
isPublic,
documentationGroups,
version
) {

Expand All @@ -250,6 +261,7 @@ private[spark] class FallbackConfigEntry[T] (
alternatives: List[String],
doc: String,
isPublic: Boolean,
documentationGroups: Set[String],
version: String,
val fallback: ConfigEntry[T])
extends ConfigEntry[T](
Expand All @@ -261,6 +273,7 @@ private[spark] class FallbackConfigEntry[T] (
fallback.stringConverter,
doc,
isPublic,
documentationGroups,
version
) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5993,10 +5993,9 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
settings.synchronized { settings.asScala.toMap }

/**
* Return all the configuration definitions that have been defined in [[SQLConf]]. Each
* definition contains key, defaultValue and doc.
* Return all the public configuration definitions that have been defined in [[SQLConf]].
*/
def getAllDefinedConfs: Seq[(String, String, String, String)] = {
def getAllDefinedConfs: Seq[(String, String, String, String, Set[String])] = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does change a public APIs type signature, I'm not sure it's super important or widely used but given the stated goal of the PR change is to improve the docs do we need this part? Or could a new internal API be introduced if we need this functionality.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is only used by us, and I also think it's the right place to make a change judging from the docstring. We also have a rare chance to "get away" with breaking public APIs due to the upcoming 4.0 release.

But it is indeed a public API. Do you prefer I just create a new API? It will duplicate this existing API but include the new documentation groups as well. I was thinking to also have this return a Seq of a case class with descriptive attribute names, so we're not calling ._1 and the like everywhere.

loadDefinedConfs()
getConfigEntries().asScala.filter(_.isPublic).map { entry =>
val displayValue =
Expand All @@ -6005,7 +6004,10 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
// e.g. `<undefined>` instead of `null`
// e.g. `<value of spark.buffer.size>` instead of `65536`
Option(getConfString(entry.key, null)).getOrElse(entry.defaultValueString)
(entry.key, displayValue, entry.doc, entry.version)
val staticOrRuntime =
if (SQLConf.isStaticConfigKey(entry.key)) "__sql_static" else "__sql_runtime"
val documentationGroups = entry.documentationGroups + staticOrRuntime
(entry.key, displayValue, entry.doc, entry.version, documentationGroups)
}.toSeq
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,18 +73,23 @@ private[sql] object PythonSQLUtils extends Logging {
FunctionRegistry.functionSet.flatMap(f => FunctionRegistry.builtin.lookupFunction(f)).toArray
}

private def listAllSQLConfigs(): Seq[(String, String, String, String)] = {
private def listAllSQLConfigs(): Seq[(String, String, String, String, Set[String])] = {
val conf = new SQLConf()
conf.getAllDefinedConfs
}

def listRuntimeSQLConfigs(): Array[(String, String, String, String)] = {
// Py4J doesn't seem to translate Seq well, so we convert to an Array.
listAllSQLConfigs().filterNot(p => SQLConf.isStaticConfigKey(p._1)).toArray
listAllSQLConfigs()
.filterNot(p => SQLConf.isStaticConfigKey(p._1))
.map(c => (c._1, c._2, c._3, c._4))
.toArray
}

def listStaticSQLConfigs(): Array[(String, String, String, String)] = {
listAllSQLConfigs().filter(p => SQLConf.isStaticConfigKey(p._1)).toArray
listAllSQLConfigs()
.filter(p => SQLConf.isStaticConfigKey(p._1))
.map(c => (c._1, c._2, c._3, c._4))
.toArray
}

def isTimestampNTZPreferred: Boolean =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ case class SetCommand(kv: Option[(String, Option[String])])
// SQLConf of the sparkSession.
case Some(("-v", None)) =>
val runFunc = (sparkSession: SparkSession) => {
sparkSession.sessionState.conf.getAllDefinedConfs.sorted.map {
case (key, defaultValue, doc, version) =>
sparkSession.sessionState.conf.getAllDefinedConfs.sortBy(_._1).map {
case (key, defaultValue, doc, version, _) =>
Row(
key,
Option(defaultValue).getOrElse("<undefined>"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -372,8 +372,8 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
assert(spark.conf.get(fallback.key, LZO.lowerCaseName()) === LZO.lowerCaseName())

val displayValue = spark.sessionState.conf.getAllDefinedConfs
.find { case (key, _, _, _) => key == fallback.key }
.map { case (_, v, _, _) => v }
.find { case (key, _, _, _, _) => key == fallback.key }
.map { case (_, v, _, _, _) => v }
.get
assert(displayValue === fallback.defaultValueString)

Expand All @@ -384,8 +384,8 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
assert(spark.conf.get(fallback.key) === LZO.lowerCaseName())

val newDisplayValue = spark.sessionState.conf.getAllDefinedConfs
.find { case (key, _, _, _) => key == fallback.key }
.map { case (_, v, _, _) => v }
.find { case (key, _, _, _, _) => key == fallback.key }
.map { case (_, v, _, _, _) => v }
.get
assert(newDisplayValue === LZO.lowerCaseName())

Expand Down Expand Up @@ -497,7 +497,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {

test("SPARK-34454: configs from the legacy namespace should be internal") {
val nonInternalLegacyConfigs = spark.sessionState.conf.getAllDefinedConfs
.filter { case (key, _, _, _) => key.contains("spark.sql.legacy.") }
.filter { case (key, _, _, _, _) => key.contains("spark.sql.legacy.") }
assert(nonInternalLegacyConfigs.isEmpty,
s"""
|Non internal legacy SQL configs:
Expand Down