Skip to content

Commit

Permalink
Set the schema case sensitivity to false for Hive
Browse files Browse the repository at this point in the history
In the translation of views, when joining tables
that have the same column name ,irrespective of
their case, make sure that the resulting relation
is using names of the columns which have unique names
in order to avoid the situation where the
SQL statement created contains ambiguous column names.
  • Loading branch information
findinpath committed Feb 28, 2022
1 parent 0f55d25 commit 1baab42
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,11 @@ public boolean shouldConvertRaggedUnionTypesToVarying() {
return true;
}

@Override
public boolean isSchemaCaseSensitive() {
return false;
}

private RelDataType nullableType(RelDataTypeFactory typeFactory, SqlTypeName typeName) {
return typeFactory.createTypeWithNullability(typeFactory.createSqlType(typeName), true);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.runtime.CalciteContextException;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.type.ReturnTypes;
import org.apache.calcite.sql.type.SqlTypeFamily;
import org.apache.calcite.sql.type.SqlTypeName;
Expand Down Expand Up @@ -618,6 +619,20 @@ public void testCastToDecimalDefault() {
assertEquals(generated, expected);
}

@Test
public void testSelectColumnNameCaseInsensitive() {
String expected = "SELECT \"some_id\"\n"
+ "FROM (SELECT \"duplicate_column_name_a\".\"some_id\", \"t\".\"SOME_ID\" AS \"SOME_ID0\"\n"
+ "FROM \"hive\".\"default\".\"duplicate_column_name_a\"\n"
+ "LEFT JOIN (SELECT TRIM(\"some_id\") AS \"SOME_ID\", CAST(TRIM(\"some_id\") AS VARCHAR(10485760)) AS \"$f1\"\n"
+ "FROM \"hive\".\"default\".\"duplicate_column_name_b\") AS \"t\" ON \"duplicate_column_name_a\".\"some_id\" = \"t\".\"$f1\") AS \"t0\"\n"
+ "WHERE \"t0\".\"some_id\" <> ''";
SqlNode node = viewToSqlNode("default", "view_column_name_case_insensitive");
converter.getSqlValidator().validate(node);
String generated = nodeToStr(node);
assertEquals(generated, expected);
}

private String relToString(String sql) {
return RelOptUtil.toString(converter.convertSql(sql));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,11 +197,19 @@ public static TestHive setupDefaultHive(HiveConf conf) throws IOException {
driver.run(
"CREATE TABLE IF NOT EXISTS nested_union(foo uniontype<int, double, struct<a:int, b:uniontype<int, double>>>)");

driver.run("CREATE TABLE IF NOT EXISTS duplicate_column_name_a (some_id string)");
driver.run("CREATE TABLE IF NOT EXISTS duplicate_column_name_b (some_id string)");
driver.run("CREATE VIEW IF NOT EXISTS view_column_name_case_insensitive AS\n"
+ " SELECT a.some_id FROM duplicate_column_name_a a\n"
+ " LEFT JOIN ( SELECT trim(some_id) AS SOME_ID FROM duplicate_column_name_b) b ON a.some_id = b.some_id\n"
+ " WHERE a.some_id != ''");

testHive.databases = ImmutableList.of(
new TestHive.DB("test", ImmutableList.of("tableOne", "tableTwo", "tableOneView")),
new TestHive.DB("default",
ImmutableList.of("bar", "complex", "foo", "foo_view", "null_check_view", "null_check_wrapper",
"schema_evolve", "view_schema_evolve", "view_schema_evolve_wrapper", "union_table", "nested_union")),
"schema_evolve", "view_schema_evolve", "view_schema_evolve_wrapper", "union_table", "nested_union",
"duplicate_column_name_a", "duplicate_column_name_b", "view_column_name_case_insensitive")),
new TestHive.DB("fuzzy_union",
ImmutableList.of("tableA", "tableB", "tableC", "union_view", "union_view_with_more_than_two_tables",
"union_view_with_alias", "union_view_single_branch_evolved",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -727,4 +727,17 @@ public void testDeduplicateUdf() {
List<SparkUDFInfo> udfJars = coralSpark.getSparkUDFInfoList();
assertEquals(1, udfJars.size());
}

@Test
public void testSelectColumnNameCaseInsensitive() {
String targetSql = String.join("\n", "SELECT some_id",
"FROM (SELECT tablea.some_id, t.SOME_ID SOME_ID0", "FROM duplicate_column_name.tablea",
"LEFT JOIN (SELECT TRIM(some_id) SOME_ID, CAST(TRIM(some_id) AS STRING) $f1",
"FROM duplicate_column_name.tableb) t ON tablea.some_id = t.$f1) t0",
"WHERE t0.some_id <> ''");
RelNode relNode = TestUtils.toRelNode("duplicate_column_name", "view_column_name_case_insensitive");
CoralSpark coralSpark = CoralSpark.create(relNode);
String expandedSql = coralSpark.getSparkSql();
assertEquals(expandedSql, targetSql);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ public static void initializeViews(HiveConf conf) throws HiveException, MetaExce
run(driver, String.join("\n", "", "CREATE VIEW IF NOT EXISTS named_struct_view", "AS",
"SELECT named_struct('abc', 123, 'def', 'xyz') AS named_struc", "FROM bar"));

run(driver, String.join("\n", "", "CREATE DATABASE IF NOT EXISTS duplicate_column_name"));
run(driver, "CREATE TABLE duplicate_column_name.tableA (some_id string)");
run(driver, "CREATE TABLE duplicate_column_name.tableB (some_id string)");
run(driver, "CREATE VIEW IF NOT EXISTS duplicate_column_name.view_column_name_case_insensitive AS "
+ "SELECT a.some_id FROM duplicate_column_name.tableA a LEFT JOIN (SELECT trim(some_id) AS SOME_ID FROM duplicate_column_name.tableB) b ON a.some_id = b.some_id WHERE a.some_id != ''");

// Views and tables used in FuzzyUnionViewTest
run(driver, String.join("\n", "", "CREATE DATABASE IF NOT EXISTS fuzzy_union"));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,14 @@ public Object[][] viewTestCasesProvider() {
+ "FROM \"test\".\"table_ints_strings\"" },

{ "test", "cast_decimal_view", "SELECT CAST(\"a\" AS DECIMAL(6, 2)) AS \"casted_decimal\"\n"
+ "FROM \"test\".\"table_ints_strings\"" } };
+ "FROM \"test\".\"table_ints_strings\"" },

{ "test", "view_column_name_case_insensitive", "SELECT \"some_id\"\n"
+ "FROM (SELECT \"duplicate_column_name_a\".\"some_id\" AS \"some_id\", \"t\".\"SOME_ID\" AS \"SOME_ID0\"\n"
+ "FROM \"test\".\"duplicate_column_name_a\"\n"
+ "LEFT JOIN (SELECT TRIM(\"some_id\") AS \"SOME_ID\", CAST(TRIM(\"some_id\") AS VARCHAR(65536)) AS \"$f1\"\n"
+ "FROM \"test\".\"duplicate_column_name_b\") AS \"t\" ON \"duplicate_column_name_a\".\"some_id\" = \"t\".\"$f1\") AS \"t0\"\n"
+ "WHERE \"t0\".\"some_id\" <> ''" } };
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,11 @@ public static void initializeViews(HiveConf conf) throws HiveException, MetaExce
run(driver,
"CREATE VIEW IF NOT EXISTS test.view_with_transform_column_name_reset AS SELECT struct_col AS structCol FROM (SELECT * FROM test.viewA UNION ALL SELECT * FROM test.viewB) X");
run(driver, "ALTER TABLE test.tableT CHANGE COLUMN structCol structCol struct<a:int, b:string>");

run(driver, "CREATE TABLE test.duplicate_column_name_a (some_id string)");
run(driver, "CREATE TABLE test.duplicate_column_name_b (some_id string)");
run(driver, "CREATE VIEW IF NOT EXISTS test.view_column_name_case_insensitive AS \n"
+ "SELECT a.some_id FROM test.duplicate_column_name_a a LEFT JOIN ( SELECT trim(some_id) AS SOME_ID FROM test.duplicate_column_name_b) b ON a.some_id = b.some_id WHERE a.some_id != ''");
}

public static RelNode convertView(String db, String view) {
Expand Down

0 comments on commit 1baab42

Please sign in to comment.