Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add infer types parameter in DSL functions #579

Merged
merged 1 commit into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ public class AddDsl<T>(@PublishedApi internal val df: DataFrame<T>) : ColumnsCon
noinline expression: RowExpression<T, R>
): Boolean = add(df.mapToColumn(name, infer, expression))

public inline fun <reified R> expr(noinline expression: RowExpression<T, R>): DataColumn<R> {
return df.mapToColumn("", Infer.Nulls, expression)
public inline fun <reified R> expr(infer: Infer = Infer.Nulls, noinline expression: RowExpression<T, R>): DataColumn<R> {
return df.mapToColumn("", infer, expression)
}

public inline infix fun <reified R> String.from(noinline expression: RowExpression<T, R>): Boolean =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,8 @@ public abstract class CreateDataFrameDsl<T> : TraversePropertiesDsl {
body: (TraversePropertiesDsl.() -> Unit)? = null,
)

public inline fun <reified R> expr(noinline expression: (T) -> R): DataColumn<R> =
source.map { expression(it) }.toColumn()
public inline fun <reified R> expr(infer: Infer = Infer.Nulls, noinline expression: (T) -> R): DataColumn<R> =
source.map { expression(it) }.toColumn(infer = infer)

public inline fun <reified R> add(name: String, noinline expression: (T) -> R): Unit =
add(source.map { expression(it) }.toColumn(name, Infer.Nulls))
Expand All @@ -165,6 +165,9 @@ public abstract class CreateDataFrameDsl<T> : TraversePropertiesDsl {
public inline infix fun <reified R> KProperty<R>.from(noinline expression: (T) -> R): Unit =
add(columnName, expression)

public inline infix fun <reified R> String.from(inferType: InferType<T, R>): Unit =
add(DataColumn.createWithTypeInference(this, source.map { inferType.expression(it) }))

public inline infix fun <reified R> KProperty<R>.from(inferType: InferType<T, R>): Unit =
add(DataColumn.createWithTypeInference(columnName, source.map { inferType.expression(it) }))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,21 @@ class CreateDataFrameTests {
res["e"].type() shouldBe typeOf<Int>()
}

@Test
fun `create column with infer type`() {
val data: List<Any> = listOf(1, 2, 3)
val res = data.toDataFrame {
"e" from inferType { it }
expr(infer = Infer.Type) { it } into "d"
}

res["e"].type() shouldBe typeOf<Int>()
res["e"].kind() shouldBe ColumnKind.Value

res["d"].type() shouldBe typeOf<Int>()
res["d"].kind() shouldBe ColumnKind.Value
}

@Test
fun `preserve fields order`() {
class B(val x: Int, val c: String, d: Double) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import org.jetbrains.kotlinx.dataframe.annotations.ColumnName
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.ExcessiveColumns
import org.jetbrains.kotlinx.dataframe.api.GroupBy
import org.jetbrains.kotlinx.dataframe.api.Infer
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.api.addAll
Expand Down Expand Up @@ -182,6 +183,7 @@ import org.jetbrains.kotlinx.dataframe.size
import org.jetbrains.kotlinx.dataframe.type
import org.jetbrains.kotlinx.dataframe.typeClass
import org.junit.Test
import java.lang.reflect.Type
import java.math.BigDecimal
import java.time.LocalDate
import kotlin.reflect.jvm.jvmErasure
Expand Down Expand Up @@ -919,6 +921,15 @@ class DataFrameTests : BaseTest() {
}
}

@Test
fun `add several columns with type inference`() {
val f: Any = 123
val df = typed.add {
expr(infer = Infer.Type) { f } into "f"
}
df["f"].type() shouldBe typeOf<Int>()
}

@Test
fun `remove one column`() {
val expected = listOf("name", "city", "weight")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ public class AddDsl<T>(@PublishedApi internal val df: DataFrame<T>) : ColumnsCon
noinline expression: RowExpression<T, R>
): Boolean = add(df.mapToColumn(name, infer, expression))

public inline fun <reified R> expr(noinline expression: RowExpression<T, R>): DataColumn<R> {
return df.mapToColumn("", Infer.Nulls, expression)
public inline fun <reified R> expr(infer: Infer = Infer.Nulls, noinline expression: RowExpression<T, R>): DataColumn<R> {
return df.mapToColumn("", infer, expression)
}

public inline infix fun <reified R> String.from(noinline expression: RowExpression<T, R>): Boolean =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,8 @@ public abstract class CreateDataFrameDsl<T> : TraversePropertiesDsl {
body: (TraversePropertiesDsl.() -> Unit)? = null,
)

public inline fun <reified R> expr(noinline expression: (T) -> R): DataColumn<R> =
source.map { expression(it) }.toColumn()
public inline fun <reified R> expr(infer: Infer = Infer.Nulls, noinline expression: (T) -> R): DataColumn<R> =
source.map { expression(it) }.toColumn(infer = infer)

public inline fun <reified R> add(name: String, noinline expression: (T) -> R): Unit =
add(source.map { expression(it) }.toColumn(name, Infer.Nulls))
Expand All @@ -165,6 +165,9 @@ public abstract class CreateDataFrameDsl<T> : TraversePropertiesDsl {
public inline infix fun <reified R> KProperty<R>.from(noinline expression: (T) -> R): Unit =
add(columnName, expression)

public inline infix fun <reified R> String.from(inferType: InferType<T, R>): Unit =
add(DataColumn.createWithTypeInference(this, source.map { inferType.expression(it) }))

public inline infix fun <reified R> KProperty<R>.from(inferType: InferType<T, R>): Unit =
add(DataColumn.createWithTypeInference(columnName, source.map { inferType.expression(it) }))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,21 @@ class CreateDataFrameTests {
res["e"].type() shouldBe typeOf<Int>()
}

@Test
fun `create column with infer type`() {
val data: List<Any> = listOf(1, 2, 3)
val res = data.toDataFrame {
"e" from inferType { it }
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here, it's good it exists, but I think it should be done by default when using expr

expr(infer = Infer.Type) { it } into "d"
}

res["e"].type() shouldBe typeOf<Int>()
res["e"].kind() shouldBe ColumnKind.Value

res["d"].type() shouldBe typeOf<Int>()
res["d"].kind() shouldBe ColumnKind.Value
}

@Test
fun `preserve fields order`() {
class B(val x: Int, val c: String, d: Double) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import org.jetbrains.kotlinx.dataframe.annotations.ColumnName
import org.jetbrains.kotlinx.dataframe.annotations.DataSchema
import org.jetbrains.kotlinx.dataframe.api.ExcessiveColumns
import org.jetbrains.kotlinx.dataframe.api.GroupBy
import org.jetbrains.kotlinx.dataframe.api.Infer
import org.jetbrains.kotlinx.dataframe.api.ParserOptions
import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.api.addAll
Expand Down Expand Up @@ -182,6 +183,7 @@ import org.jetbrains.kotlinx.dataframe.size
import org.jetbrains.kotlinx.dataframe.type
import org.jetbrains.kotlinx.dataframe.typeClass
import org.junit.Test
import java.lang.reflect.Type
import java.math.BigDecimal
import java.time.LocalDate
import kotlin.reflect.jvm.jvmErasure
Expand Down Expand Up @@ -919,6 +921,15 @@ class DataFrameTests : BaseTest() {
}
}

@Test
fun `add several columns with type inference`() {
val f: Any = 123
val df = typed.add {
expr(infer = Infer.Type) { f } into "f"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually expected this to be done by default. If you need to manually specify to infer types, I think people will just not do it. And then, it will be Any?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's consistent with add itself (default infer = Infer.Nulls) and most other usages of this enum. I think it makes sense. Looking at values to deduce type is a reflective operation and, while i didn't test it myself and decision wasn't made by me, i suppose it can be quite heavy + produce unexpected results (more precise type than needed or something). So i'd say keeping it as an option is a reasonable compromise

}
df["f"].type() shouldBe typeOf<Int>()
}

@Test
fun `remove one column`() {
val expected = listOf("name", "city", "weight")
Expand Down