-
Notifications
You must be signed in to change notification settings - Fork 2.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[HUDI-4100] CTAS failed to clean up when given an illegal MANAGED table definition #5588
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -763,4 +763,22 @@ class TestCreateTable extends HoodieSparkSqlTestBase { | |
assertResult(true)(shown.contains("COMMENT 'This is a simple hudi table'")) | ||
} | ||
} | ||
|
||
test("Test CTAS using an illegal definition -- a COW table with compaction enabled.") { | ||
val tableName = generateTableName | ||
checkExceptionContain( | ||
s""" | ||
| create table $tableName using hudi | ||
| tblproperties( | ||
| primaryKey = 'id', | ||
| type = 'cow', | ||
| hoodie.compact.inline='true' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry I do not see the check logic for the invalid definition in this PR, am I missing it? |
||
| ) | ||
| AS | ||
| select 1 as id, 'a1' as name, 10 as price, 1000 as ts | ||
|""".stripMargin)("Compaction is not supported on a CopyOnWrite table") | ||
val dbPath = spark.sessionState.catalog.getDatabaseMetadata("default").locationUri.getPath | ||
val tablePath = s"${dbPath}/${tableName}" | ||
assertResult(false)(existsPath(tablePath)) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,7 @@ import org.apache.spark.sql.hudi.{HoodieSqlCommonUtils, ProvidesHoodieConfig} | |
import org.apache.spark.sql.types.{StructField, StructType} | ||
import org.apache.spark.sql.{Dataset, SaveMode, SparkSession, _} | ||
|
||
import java.net.URI | ||
import java.util | ||
import scala.collection.JavaConverters.{mapAsJavaMapConverter, mapAsScalaMapConverter} | ||
|
||
|
@@ -50,7 +51,9 @@ class HoodieCatalog extends DelegatingCatalogExtension | |
|
||
override def stageCreate(ident: Identifier, schema: StructType, partitions: Array[Transform], properties: util.Map[String, String]): StagedTable = { | ||
if (sparkAdapter.isHoodieTable(properties)) { | ||
HoodieStagedTable(ident, this, schema, partitions, properties, TableCreationMode.STAGE_CREATE) | ||
val locUriAndTableType = deduceTableLocationURIAndTableType(ident, properties) | ||
HoodieStagedTable(ident, locUriAndTableType, this, schema, partitions, | ||
properties, TableCreationMode.STAGE_CREATE) | ||
} else { | ||
BasicStagedTable( | ||
ident, | ||
|
@@ -61,7 +64,9 @@ class HoodieCatalog extends DelegatingCatalogExtension | |
|
||
override def stageReplace(ident: Identifier, schema: StructType, partitions: Array[Transform], properties: util.Map[String, String]): StagedTable = { | ||
if (sparkAdapter.isHoodieTable(properties)) { | ||
HoodieStagedTable(ident, this, schema, partitions, properties, TableCreationMode.STAGE_REPLACE) | ||
val locUriAndTableType = deduceTableLocationURIAndTableType(ident, properties) | ||
HoodieStagedTable(ident, locUriAndTableType, this, schema, partitions, | ||
properties, TableCreationMode.STAGE_REPLACE) | ||
} else { | ||
super.dropTable(ident) | ||
BasicStagedTable( | ||
|
@@ -76,8 +81,9 @@ class HoodieCatalog extends DelegatingCatalogExtension | |
partitions: Array[Transform], | ||
properties: util.Map[String, String]): StagedTable = { | ||
if (sparkAdapter.isHoodieTable(properties)) { | ||
HoodieStagedTable( | ||
ident, this, schema, partitions, properties, TableCreationMode.CREATE_OR_REPLACE) | ||
val locUriAndTableType = deduceTableLocationURIAndTableType(ident, properties) | ||
HoodieStagedTable(ident, locUriAndTableType, this, schema, partitions, | ||
properties, TableCreationMode.CREATE_OR_REPLACE) | ||
} else { | ||
try super.dropTable(ident) catch { | ||
case _: NoSuchTableException => // ignore the exception | ||
|
@@ -112,7 +118,9 @@ class HoodieCatalog extends DelegatingCatalogExtension | |
schema: StructType, | ||
partitions: Array[Transform], | ||
properties: util.Map[String, String]): Table = { | ||
createHoodieTable(ident, schema, partitions, properties, Map.empty, Option.empty, TableCreationMode.CREATE) | ||
val locUriAndTableType = deduceTableLocationURIAndTableType(ident, properties) | ||
createHoodieTable(ident, schema, locUriAndTableType, partitions, properties, | ||
Map.empty, Option.empty, TableCreationMode.CREATE) | ||
} | ||
|
||
override def tableExists(ident: Identifier): Boolean = super.tableExists(ident) | ||
|
@@ -193,8 +201,30 @@ class HoodieCatalog extends DelegatingCatalogExtension | |
loadTable(ident) | ||
} | ||
|
||
private def deduceTableLocationURIAndTableType( | ||
ident: Identifier, properties: util.Map[String, String]): (URI, CatalogTableType) = { | ||
val locOpt = if (isPathIdentifier(ident)) { | ||
Option(ident.name()) | ||
} else { | ||
Option(properties.get("location")) | ||
} | ||
val tableType = if (locOpt.nonEmpty) { | ||
CatalogTableType.EXTERNAL | ||
} else { | ||
CatalogTableType.MANAGED | ||
} | ||
val locUriOpt = locOpt.map(CatalogUtils.stringToURI) | ||
val tableIdent = ident.asTableIdentifier | ||
val existingTableOpt = getExistingTableIfExists(tableIdent) | ||
val locURI = locUriOpt | ||
.orElse(existingTableOpt.flatMap(_.storage.locationUri)) | ||
.getOrElse(spark.sessionState.catalog.defaultTablePath(tableIdent)) | ||
(locURI, tableType) | ||
} | ||
|
||
def createHoodieTable(ident: Identifier, | ||
schema: StructType, | ||
locUriAndTableType: (URI, CatalogTableType), | ||
partitions: Array[Transform], | ||
allTableProperties: util.Map[String, String], | ||
writeOptions: Map[String, String], | ||
|
@@ -206,29 +236,17 @@ class HoodieCatalog extends DelegatingCatalogExtension | |
val newPartitionColumns = partitionColumns | ||
val newBucketSpec = maybeBucketSpec | ||
|
||
val isByPath = isPathIdentifier(ident) | ||
|
||
val location = if (isByPath) Option(ident.name()) else Option(allTableProperties.get("location")) | ||
val id = ident.asTableIdentifier | ||
|
||
val locUriOpt = location.map(CatalogUtils.stringToURI) | ||
val existingTableOpt = getExistingTableIfExists(id) | ||
val loc = locUriOpt | ||
.orElse(existingTableOpt.flatMap(_.storage.locationUri)) | ||
.getOrElse(spark.sessionState.catalog.defaultTablePath(id)) | ||
val storage = DataSource.buildStorageFormatFromOptions(writeOptions.--(needFilterProps)) | ||
.copy(locationUri = Option(loc)) | ||
val tableType = | ||
if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we call deduceTableLocationURIAndTableType in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You mean -- HoodieStageTable invokes I'd prefer to invoke There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the first version of this PR, I construct a whole There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we call deduceTableLocationURIAndTableType in |
||
.copy(locationUri = Option(locUriAndTableType._1)) | ||
val commentOpt = Option(allTableProperties.get("comment")) | ||
|
||
val tablePropertiesNew = new util.HashMap[String, String](allTableProperties) | ||
// put path to table properties. | ||
tablePropertiesNew.put("path", loc.getPath) | ||
tablePropertiesNew.put("path", locUriAndTableType._1.getPath) | ||
|
||
val tableDesc = new CatalogTable( | ||
identifier = id, | ||
tableType = tableType, | ||
identifier = ident.asTableIdentifier, | ||
tableType = locUriAndTableType._2, | ||
storage = storage, | ||
schema = newSchema, | ||
provider = Option("hudi"), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sorry I do not see the check logic for the invalid definition in this PR, am I missing it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You mean this line ?https://github.com/apache/hudi/pull/5588/files/c06740700f51f557b77ffc19068e36a4cb19864a#diff-52cf34bac0cb6cc5b9c0f46c5fd190ac202a1db824182b59137d83ae0db38c89R779
"Compaction is not supported on a CopyOnWrite table"
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no, what i mean is the check logic in hudi
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Current HoodieSparkCopyOnWriteTable #scheduleCompaction and #compact throws
new HoodieNotSupportedException("Compaction is not supported on a CopyOnWrite table");
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jinxing64 but how did the create table with
hoodie.compact.inline=true
callHoodieSparkCopyOnWriteTable #scheduleCompaction
or#compact
? ps: you can add my wechat(xleesf) to communicate offlineThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jinxing64 but how did the create table with
hoodie.compact.inline=true
callHoodieSparkCopyOnWriteTable #scheduleCompaction
or#compact
? ps: you can add my wechat(xleesf) to communicate offlineThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry for late reply ~ I applied on "wechat" :)
Stacktrace as below:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In my understanding about the compaction triggering mechanism -- when inlineCompaction is enabled,
HoodieSparkTable#scheduleCompaction
is invoked fromHoodieWriteClient
. ThenScheduleCompactionActionExecutor
decides the triggering timing;The exception above (
Compaction is not supported
) is thrown fromHoodieSparkCopyOnWriteTable