Skip to content

Commit

Permalink
Type mapping rewriting for JDBC integration (#505)
Browse files Browse the repository at this point in the history
* Refactor JDBC test files and enhance readJdbc.kt for better Dataframe creation

* Updated test cases for JDBC connection and enhanced the logic for reading JDBC connections in readJdbc.kt.

In this commit, the ports in the JDBC URLs in imdbTest.kt file were modified to ensure correct connection to database servers. A new URL constant was added to facilitate reading tables with schema.

A new test case was added for better testing of reading tables with schemas.

In readJdbc.kt, the logic for fetching and converting data from the ResultSet was revised for better dataframe creation.

* Updated: Removed method convertDataFromResultSet from DbType class

* Add catalogue and limit parameters to readAllSqlTables"

* Update DbType methods and Postgres tests.

* Handle null values in DataFrame filtering and mark code for removal

* Removed redundant DataFrame read methods in readJdbc.kt

* Changes include handling potential duplication of column names in SQL queries

* Added support for empty tables

* Refactor type conversions for better handling of unsupported SQL types

* Fixed the test with new types generation

* Added simple Test for JSON column handling

* disabled integration tests

* Ensure non-nullability of "amount" in Sale, refactor SQL to KType mapping

* Fix nullability check in readJdbc.kt

* Improve the handling of nullability of database columns when creating DataFrame.

* Updated tests for an imdb database

* Updated tests for an mysql/mariadb database

* Updated tests for a postgresql database

* Updated tests for a sqlite database

* Updated tests for a sqlite database

* Formatted

* Formattedd
  • Loading branch information
zaleslaw authored Dec 14, 2023
1 parent 0f7fdce commit f3256c5
Show file tree
Hide file tree
Showing 15 changed files with 831 additions and 794 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import org.jetbrains.kotlinx.dataframe.io.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.sql.ResultSet
import org.jetbrains.kotlinx.dataframe.io.TableMetadata
import kotlin.reflect.KType

/**
* The `DbType` class represents a database type used for reading dataframe from the database.
Expand All @@ -22,19 +23,10 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) {
*/
public abstract val driverClassName: String

/**
* Converts the data from the given [ResultSet] into the specified [TableColumnMetadata] type.
*
* @param rs The [ResultSet] containing the data to be converted.
* @param tableColumnMetadata The [TableColumnMetadata] representing the target type of the conversion.
* @return The converted data as an instance of [Any].
*/
public abstract fun convertDataFromResultSet(rs: ResultSet, tableColumnMetadata: TableColumnMetadata): Any?

/**
* Returns a [ColumnSchema] produced from [tableColumnMetadata].
*/
public abstract fun toColumnSchema(tableColumnMetadata: TableColumnMetadata): ColumnSchema
public abstract fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema?

/**
* Checks if the given table name is a system table for the specified database type.
Expand All @@ -52,4 +44,12 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) {
* @return the TableMetadata object representing the table metadata.
*/
public abstract fun buildTableMetadata(tables: ResultSet): TableMetadata

/**
* Converts SQL data type to a Kotlin data type.
*
* @param [tableColumnMetadata] The metadata of the table column.
* @return The corresponding Kotlin data type, or null if no mapping is found.
*/
public abstract fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType?
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@ import org.jetbrains.kotlinx.dataframe.io.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.sql.ResultSet
import java.util.Locale
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.io.TableMetadata
import kotlin.reflect.KType
import kotlin.reflect.typeOf

/**
Expand All @@ -21,71 +20,8 @@ public object H2 : DbType("h2") {
override val driverClassName: String
get() = "org.h2.Driver"

override fun convertDataFromResultSet(rs: ResultSet, tableColumnMetadata: TableColumnMetadata): Any? {
val name = tableColumnMetadata.name
return when (tableColumnMetadata.sqlTypeName) {
"CHARACTER", "CHAR" -> rs.getString(name)
"CHARACTER VARYING", "CHAR VARYING", "VARCHAR" -> rs.getString(name)
"CHARACTER LARGE OBJECT", "CHAR LARGE OBJECT", "CLOB" -> rs.getString(name)
"MEDIUMTEXT" -> rs.getString(name)
"VARCHAR_IGNORECASE" -> rs.getString(name)
"BINARY" -> rs.getBytes(name)
"BINARY VARYING", "VARBINARY" -> rs.getBytes(name)
"BINARY LARGE OBJECT", "BLOB" -> rs.getBytes(name)
"BOOLEAN" -> rs.getBoolean(name)
"TINYINT" -> rs.getByte(name)
"SMALLINT" -> rs.getShort(name)
"INTEGER", "INT" -> rs.getInt(name)
"BIGINT" -> rs.getLong(name)
"NUMERIC", "DECIMAL", "DEC" -> rs.getFloat(name) // not a BigDecimal
"REAL", "FLOAT" -> rs.getFloat(name)
"DOUBLE PRECISION" -> rs.getDouble(name)
"DECFLOAT" -> rs.getDouble(name)
"DATE" -> rs.getDate(name).toString()
"TIME" -> rs.getTime(name).toString()
"TIME WITH TIME ZONE" -> rs.getTime(name).toString()
"TIMESTAMP" -> rs.getTimestamp(name).toString()
"TIMESTAMP WITH TIME ZONE" -> rs.getTimestamp(name).toString()
"INTERVAL" -> rs.getObject(name).toString()
"JAVA_OBJECT" -> rs.getObject(name)
"ENUM" -> rs.getString(name)
"JSON" -> rs.getString(name) // TODO: https://github.com/Kotlin/dataframe/issues/462
"UUID" -> rs.getString(name)
else -> throw IllegalArgumentException("Unsupported H2 type: ${tableColumnMetadata.sqlTypeName}")
}
}

override fun toColumnSchema(tableColumnMetadata: TableColumnMetadata): ColumnSchema {
return when (tableColumnMetadata.sqlTypeName) {
"CHARACTER", "CHAR" -> ColumnSchema.Value(typeOf<String>())
"CHARACTER VARYING", "CHAR VARYING", "VARCHAR" -> ColumnSchema.Value(typeOf<String>())
"CHARACTER LARGE OBJECT", "CHAR LARGE OBJECT", "CLOB" -> ColumnSchema.Value(typeOf<String>())
"MEDIUMTEXT" -> ColumnSchema.Value(typeOf<String>())
"VARCHAR_IGNORECASE" -> ColumnSchema.Value(typeOf<String>())
"BINARY" -> ColumnSchema.Value(typeOf<ByteArray>())
"BINARY VARYING", "VARBINARY" -> ColumnSchema.Value(typeOf<ByteArray>())
"BINARY LARGE OBJECT", "BLOB" -> ColumnSchema.Value(typeOf<ByteArray>())
"BOOLEAN" -> ColumnSchema.Value(typeOf<Boolean>())
"TINYINT" -> ColumnSchema.Value(typeOf<Byte>())
"SMALLINT" -> ColumnSchema.Value(typeOf<Short>())
"INTEGER", "INT" -> ColumnSchema.Value(typeOf<Int>())
"BIGINT" -> ColumnSchema.Value(typeOf<Long>())
"NUMERIC", "DECIMAL", "DEC" -> ColumnSchema.Value(typeOf<Float>())
"REAL", "FLOAT" -> ColumnSchema.Value(typeOf<Float>())
"DOUBLE PRECISION" -> ColumnSchema.Value(typeOf<Double>())
"DECFLOAT" -> ColumnSchema.Value(typeOf<Double>())
"DATE" -> ColumnSchema.Value(typeOf<String>())
"TIME" -> ColumnSchema.Value(typeOf<String>())
"TIME WITH TIME ZONE" -> ColumnSchema.Value(typeOf<String>())
"TIMESTAMP" -> ColumnSchema.Value(typeOf<String>())
"TIMESTAMP WITH TIME ZONE" -> ColumnSchema.Value(typeOf<String>())
"INTERVAL" -> ColumnSchema.Value(typeOf<String>())
"JAVA_OBJECT" -> ColumnSchema.Value(typeOf<Any>())
"ENUM" -> ColumnSchema.Value(typeOf<String>())
"JSON" -> ColumnSchema.Value(typeOf<String>()) // TODO: https://github.com/Kotlin/dataframe/issues/462
"UUID" -> ColumnSchema.Value(typeOf<String>())
else -> throw IllegalArgumentException("Unsupported H2 type: ${tableColumnMetadata.sqlTypeName} for column ${tableColumnMetadata.name}")
}
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? {
return null
}

override fun isSystemTable(tableMetadata: TableMetadata): Boolean {
Expand All @@ -99,4 +35,8 @@ public object H2 : DbType("h2") {
tables.getString("TABLE_SCHEM"),
tables.getString("TABLE_CAT"))
}

override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? {
return null
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import org.jetbrains.kotlinx.dataframe.io.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.sql.ResultSet
import org.jetbrains.kotlinx.dataframe.io.TableMetadata
import kotlin.reflect.KType
import kotlin.reflect.typeOf

/**
Expand All @@ -16,73 +17,8 @@ public object MariaDb : DbType("mariadb") {
override val driverClassName: String
get() = "org.mariadb.jdbc.Driver"

override fun convertDataFromResultSet(rs: ResultSet, tableColumnMetadata: TableColumnMetadata): Any? {
val name = tableColumnMetadata.name
return when (tableColumnMetadata.sqlTypeName) {
"BIT" -> rs.getBytes(name)
"TINYINT" -> rs.getInt(name)
"SMALLINT" -> rs.getInt(name)
"MEDIUMINT"-> rs.getInt(name)
"MEDIUMINT UNSIGNED" -> rs.getLong(name)
"INTEGER", "INT" -> rs.getInt(name)
"INTEGER UNSIGNED", "INT UNSIGNED" -> rs.getLong(name)
"BIGINT" -> rs.getLong(name)
"FLOAT" -> rs.getFloat(name)
"DOUBLE" -> rs.getDouble(name)
"DECIMAL" -> rs.getBigDecimal(name)
"DATE" -> rs.getDate(name).toString()
"DATETIME" -> rs.getTimestamp(name).toString()
"TIMESTAMP" -> rs.getTimestamp(name).toString()
"TIME"-> rs.getTime(name).toString()
"YEAR" -> rs.getDate(name).toString()
"VARCHAR", "CHAR" -> rs.getString(name)
"BINARY" -> rs.getBytes(name)
"VARBINARY" -> rs.getBytes(name)
"TINYBLOB"-> rs.getBytes(name)
"BLOB"-> rs.getBytes(name)
"MEDIUMBLOB" -> rs.getBytes(name)
"LONGBLOB" -> rs.getBytes(name)
"TEXT" -> rs.getString(name)
"MEDIUMTEXT" -> rs.getString(name)
"LONGTEXT" -> rs.getString(name)
"ENUM" -> rs.getString(name)
"SET" -> rs.getString(name)
else -> throw IllegalArgumentException("Unsupported MariaDB type: ${tableColumnMetadata.sqlTypeName}")
}
}

override fun toColumnSchema(tableColumnMetadata: TableColumnMetadata): ColumnSchema {
return when (tableColumnMetadata.sqlTypeName) {
"BIT" -> ColumnSchema.Value(typeOf<ByteArray>())
"TINYINT" -> ColumnSchema.Value(typeOf<Int>())
"SMALLINT" -> ColumnSchema.Value(typeOf<Int>())
"MEDIUMINT"-> ColumnSchema.Value(typeOf<Int>())
"MEDIUMINT UNSIGNED" -> ColumnSchema.Value(typeOf<Long>())
"INTEGER", "INT" -> ColumnSchema.Value(typeOf<Int>())
"INTEGER UNSIGNED", "INT UNSIGNED" -> ColumnSchema.Value(typeOf<Long>())
"BIGINT" -> ColumnSchema.Value(typeOf<Long>())
"FLOAT" -> ColumnSchema.Value(typeOf<Float>())
"DOUBLE" -> ColumnSchema.Value(typeOf<Double>())
"DECIMAL" -> ColumnSchema.Value(typeOf<Double>())
"DATE" -> ColumnSchema.Value(typeOf<String>())
"DATETIME" -> ColumnSchema.Value(typeOf<String>())
"TIMESTAMP" -> ColumnSchema.Value(typeOf<String>())
"TIME"-> ColumnSchema.Value(typeOf<String>())
"YEAR" -> ColumnSchema.Value(typeOf<String>())
"VARCHAR", "CHAR" -> ColumnSchema.Value(typeOf<String>())
"BINARY" -> ColumnSchema.Value(typeOf<ByteArray>())
"VARBINARY" -> ColumnSchema.Value(typeOf<ByteArray>())
"TINYBLOB"-> ColumnSchema.Value(typeOf<ByteArray>())
"BLOB"-> ColumnSchema.Value(typeOf<ByteArray>())
"MEDIUMBLOB" -> ColumnSchema.Value(typeOf<ByteArray>())
"LONGBLOB" -> ColumnSchema.Value(typeOf<ByteArray>())
"TEXT" -> ColumnSchema.Value(typeOf<String>())
"MEDIUMTEXT" -> ColumnSchema.Value(typeOf<String>())
"LONGTEXT" -> ColumnSchema.Value(typeOf<String>())
"ENUM" -> ColumnSchema.Value(typeOf<String>())
"SET" -> ColumnSchema.Value(typeOf<String>())
else -> throw IllegalArgumentException("Unsupported MariaDB type: ${tableColumnMetadata.sqlTypeName} for column ${tableColumnMetadata.name}")
}
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? {
return null
}

override fun isSystemTable(tableMetadata: TableMetadata): Boolean {
Expand All @@ -95,4 +31,8 @@ public object MariaDb : DbType("mariadb") {
tables.getString("table_schem"),
tables.getString("table_cat"))
}

override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? {
return null
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import java.util.Locale
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.io.TableMetadata
import kotlin.reflect.KType
import kotlin.reflect.typeOf

/**
Expand All @@ -19,79 +20,8 @@ public object MySql : DbType("mysql") {
override val driverClassName: String
get() = "com.mysql.jdbc.Driver"

override fun convertDataFromResultSet(rs: ResultSet, tableColumnMetadata: TableColumnMetadata): Any? {
val name = tableColumnMetadata.name
return when (tableColumnMetadata.sqlTypeName) {
"BIT" -> rs.getBytes(name)
"TINYINT" -> rs.getInt(name)
"SMALLINT" -> rs.getInt(name)
"MEDIUMINT"-> rs.getInt(name)
"MEDIUMINT UNSIGNED" -> rs.getLong(name)
"INTEGER", "INT" -> rs.getInt(name)
"INTEGER UNSIGNED", "INT UNSIGNED" -> rs.getLong(name)
"BIGINT" -> rs.getLong(name)
"FLOAT" -> rs.getFloat(name)
"DOUBLE" -> rs.getDouble(name)
"DECIMAL" -> rs.getBigDecimal(name)
"DATE" -> rs.getDate(name).toString()
"DATETIME" -> rs.getTimestamp(name).toString()
"TIMESTAMP" -> rs.getTimestamp(name).toString()
"TIME"-> rs.getTime(name).toString()
"YEAR" -> rs.getDate(name).toString()
"VARCHAR", "CHAR" -> rs.getString(name)
"BINARY" -> rs.getBytes(name)
"VARBINARY" -> rs.getBytes(name)
"TINYBLOB"-> rs.getBytes(name)
"BLOB"-> rs.getBytes(name)
"MEDIUMBLOB" -> rs.getBytes(name)
"LONGBLOB" -> rs.getBytes(name)
"TEXT" -> rs.getString(name)
"MEDIUMTEXT" -> rs.getString(name)
"LONGTEXT" -> rs.getString(name)
"ENUM" -> rs.getString(name)
"SET" -> rs.getString(name)
// special mysql types
"JSON" -> rs.getString(name) // TODO: https://github.com/Kotlin/dataframe/issues/462
"GEOMETRY" -> rs.getBytes(name)
else -> throw IllegalArgumentException("Unsupported MySQL type: ${tableColumnMetadata.sqlTypeName}")
}
}

override fun toColumnSchema(tableColumnMetadata: TableColumnMetadata): ColumnSchema {
return when (tableColumnMetadata.sqlTypeName) {
"BIT" -> ColumnSchema.Value(typeOf<ByteArray>())
"TINYINT" -> ColumnSchema.Value(typeOf<Int>())
"SMALLINT" -> ColumnSchema.Value(typeOf<Int>())
"MEDIUMINT"-> ColumnSchema.Value(typeOf<Int>())
"MEDIUMINT UNSIGNED" -> ColumnSchema.Value(typeOf<Long>())
"INTEGER", "INT" -> ColumnSchema.Value(typeOf<Int>())
"INTEGER UNSIGNED", "INT UNSIGNED" -> ColumnSchema.Value(typeOf<Long>())
"BIGINT" -> ColumnSchema.Value(typeOf<Long>())
"FLOAT" -> ColumnSchema.Value(typeOf<Float>())
"DOUBLE" -> ColumnSchema.Value(typeOf<Double>())
"DECIMAL" -> ColumnSchema.Value(typeOf<Double>())
"DATE" -> ColumnSchema.Value(typeOf<String>())
"DATETIME" -> ColumnSchema.Value(typeOf<String>())
"TIMESTAMP" -> ColumnSchema.Value(typeOf<String>())
"TIME"-> ColumnSchema.Value(typeOf<String>())
"YEAR" -> ColumnSchema.Value(typeOf<String>())
"VARCHAR", "CHAR" -> ColumnSchema.Value(typeOf<String>())
"BINARY" -> ColumnSchema.Value(typeOf<ByteArray>())
"VARBINARY" -> ColumnSchema.Value(typeOf<ByteArray>())
"TINYBLOB"-> ColumnSchema.Value(typeOf<ByteArray>())
"BLOB"-> ColumnSchema.Value(typeOf<ByteArray>())
"MEDIUMBLOB" -> ColumnSchema.Value(typeOf<ByteArray>())
"LONGBLOB" -> ColumnSchema.Value(typeOf<ByteArray>())
"TEXT" -> ColumnSchema.Value(typeOf<String>())
"MEDIUMTEXT" -> ColumnSchema.Value(typeOf<String>())
"LONGTEXT" -> ColumnSchema.Value(typeOf<String>())
"ENUM" -> ColumnSchema.Value(typeOf<String>())
"SET" -> ColumnSchema.Value(typeOf<String>())
// special mysql types
"JSON" -> ColumnSchema.Value(typeOf<ColumnGroup<DataRow<String>>>()) // TODO: https://github.com/Kotlin/dataframe/issues/462
"GEOMETRY" -> ColumnSchema.Value(typeOf<ByteArray>())
else -> throw IllegalArgumentException("Unsupported MySQL type: ${tableColumnMetadata.sqlTypeName} for column ${tableColumnMetadata.name}")
}
override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? {
return null
}

override fun isSystemTable(tableMetadata: TableMetadata): Boolean {
Expand All @@ -116,4 +46,8 @@ public object MySql : DbType("mysql") {
tables.getString("table_schem"),
tables.getString("table_cat"))
}

override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? {
return null
}
}
Loading

0 comments on commit f3256c5

Please sign in to comment.