From f1d79ef2757d7fb46c9e3debee7cd1f65afb6ed4 Mon Sep 17 00:00:00 2001 From: luxu1-ms <68044595+luxu1-ms@users.noreply.github.com> Date: Fri, 17 Feb 2023 13:55:57 -0800 Subject: [PATCH] Fix scala test (#213) * fix df column metadata for assert comparison * add method to compare schema --------- Co-authored-by: shivsood --- .../src/main/scala/ConnectorTestUtils.scala | 7 +++++++ .../src/main/scala/MasterInstanceTest.scala | 12 ++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/test/scala_test/src/main/scala/ConnectorTestUtils.scala b/test/scala_test/src/main/scala/ConnectorTestUtils.scala index c56fcedd..7846b86e 100755 --- a/test/scala_test/src/main/scala/ConnectorTestUtils.scala +++ b/test/scala_test/src/main/scala/ConnectorTestUtils.scala @@ -318,6 +318,13 @@ class Connector_TestUtils(spark:SparkSession, runWithReliableConnector = flag } + // The new spark jdbc reads automatically fill metadata field + // This method empty the metadata of column schema and do the comparison + def compareSchemaIgnoreColsMetadata(df_schema:StructType, result_schema:StructType) : Boolean = { + val result_schema_cleaned = StructType(result_schema.map(_.copy(metadata = Metadata.empty))) + df_schema == result_schema_cleaned + } + object dfTableUtility { val table_cols = List ( ("RecordTime", IntegerType, "int", true), diff --git a/test/scala_test/src/main/scala/MasterInstanceTest.scala b/test/scala_test/src/main/scala/MasterInstanceTest.scala index 7b8ab04f..c0609c3c 100755 --- a/test/scala_test/src/main/scala/MasterInstanceTest.scala +++ b/test/scala_test/src/main/scala/MasterInstanceTest.scala @@ -67,7 +67,7 @@ class MasterInstanceTest(testUtils:Connector_TestUtils) { val df = testUtils.create_toy_df() testUtils.df_write(df, SaveMode.Overwrite, table_name) var result = testUtils.df_read(table_name) - assert(df.schema == result.schema) + assert(testUtils.compareSchemaIgnoreColsMetadata(df.schema, result.schema)) var query = s"(select * from ${table_name} where entry_number > 100) emp_alias" result = testUtils.df_read(query) assert(result.count == 2) @@ -230,7 +230,7 @@ class MasterInstanceTest(testUtils:Connector_TestUtils) { val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema) testUtils.df_write(df, SaveMode.Overwrite, table_name) var result = testUtils.df_read(table_name) - assert(df.schema == result.schema) + assert(testUtils.compareSchemaIgnoreColsMetadata(df.schema, result.schema)) assert(df.count == result.count) val df_rows = df.orderBy(asc("entry_number")).collect() val result_rows = result.orderBy(asc("entry_number")).collect() @@ -499,7 +499,7 @@ class MasterInstanceTest(testUtils:Connector_TestUtils) { testUtils.df_write(df, SaveMode.Append, table_name, tabLock = "false") var result = testUtils.df_read(table_name) - assert(df.schema == result.schema) + assert(testUtils.compareSchemaIgnoreColsMetadata(df.schema, result.schema)) assert(result.count == 2*df.count()) log.info("test_gci_tabLock_write : Exit") testUtils.drop_test_table(table_name) @@ -513,7 +513,7 @@ class MasterInstanceTest(testUtils:Connector_TestUtils) { testUtils.df_write(df, SaveMode.Append, table_name, encrypt = "true", trustServerCertificate = "true") var result = testUtils.df_read(table_name) - assert(df.schema == result.schema) + assert(testUtils.compareSchemaIgnoreColsMetadata(df.schema, result.schema)) assert(result.count == 2*df.count()) log.info("test_gci_secureURL_write : Exit") testUtils.drop_test_table(table_name) @@ -576,9 +576,9 @@ class MasterInstanceTest(testUtils:Connector_TestUtils) { Await.result(futureB, Duration.Inf) var result1 = testUtils.df_read(table_name1) - assert(df.schema == result1.schema) + assert(testUtils.compareSchemaIgnoreColsMetadata(df.schema, result1.schema)) var result2 = testUtils.df_read(table_name2) - assert(df.schema == result2.schema) + assert(testUtils.compareSchemaIgnoreColsMetadata(df.schema, result2.schema)) log.info("test_write_parallel : Exit") testUtils.drop_test_table(table_name1) testUtils.drop_test_table(table_name2)