Skip to content

Commit

Permalink
Use repartition(1) instead of coalesce(1) for OPTIMIZE
Browse files Browse the repository at this point in the history
Signed-off-by: Eunjin Song <sezruby@gmail.com>
  • Loading branch information
sezruby authored and jbguerraz committed Jul 6, 2022
1 parent 54a3871 commit 4561420
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,13 @@ class OptimizeExecutor(
approxNumFiles,
zOrderByColumns)
} else {
input.coalesce(numPartitions = 1)
val useRepartition = sparkSession.sessionState.conf.getConf(
DeltaSQLConf.DELTA_OPTIMIZE_USE_REPARTITON)
if (useRepartition) {
input.repartition(numPartitions = 1)
} else {
input.coalesce(numPartitions = 1)
}
}

val partitionDesc = partition.toSeq.map(entry => entry._1 + "=" + entry._2).mkString(",")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,12 @@ trait DeltaSQLConfBase {
.transform(_.toLowerCase(Locale.ROOT))
.createWithDefault("table")

val DELTA_OPTIMIZE_USE_REPARTITON =
buildConf("optimize.repartition.enabled")
.internal()
.doc("Use repartition(1) instead of coalesce(1) to merge small files.")
.booleanConf
.createWithDefault(false)

val DELTA_ALTER_TABLE_CHANGE_COLUMN_CHECK_EXPRESSIONS =
buildConf("alterTable.changeColumn.checkExpressions")
Expand Down

0 comments on commit 4561420

Please sign in to comment.