Skip to content

Commit

Permalink
perf: Direct decoding & encoding
Browse files Browse the repository at this point in the history
Also added logicalType `char` for CHAR kotlin primitive type
  • Loading branch information
Chuckame committed May 28, 2024
1 parent 41dfff3 commit d0a7d7e
Show file tree
Hide file tree
Showing 29 changed files with 2,475 additions and 386 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ yourAvroInstance.schema<Pizza>()
| `Long` | `long` | `int`, `float`, `double`, `string` | | |
| `Float` | `float` | `double`, `string` | | |
| `Double` | `double` | `float`, `string` | | |
| `Char` | `int` | `string` | | The value serialized is the char code. When reading from a `string`, requires exactly 1 char |
| `Char` | `int` | `string` | `char` | The value serialized is the char code. When reading from a `string`, requires exactly 1 char |
| `String` | `string` | `bytes`, `fixed` | | |
| `ByteArray` | `bytes` | `string`, `fixed` | | |
| `Map<*, *>` | `map` | | | The map key must be string-able. Mainly everything is string-able except null and composite types (collection, data classes) |
Expand Down
47 changes: 35 additions & 12 deletions api/avro4k-core.api
Original file line number Diff line number Diff line change
Expand Up @@ -33,22 +33,26 @@ public final class com/github/avrokotlin/avro4k/AvroBuilder {
public final fun getFieldNamingStrategy ()Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;
public final fun getImplicitNulls ()Z
public final fun getSerializersModule ()Lkotlinx/serialization/modules/SerializersModule;
public final fun getValidateSerialization ()Z
public final fun setFieldNamingStrategy (Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;)V
public final fun setImplicitNulls (Z)V
public final fun setSerializersModule (Lkotlinx/serialization/modules/SerializersModule;)V
public final fun setValidateSerialization (Z)V
}

public final class com/github/avrokotlin/avro4k/AvroConfiguration {
public fun <init> ()V
public fun <init> (Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;Z)V
public synthetic fun <init> (Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;ZILkotlin/jvm/internal/DefaultConstructorMarker;)V
public fun <init> (Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;ZZ)V
public synthetic fun <init> (Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;ZZILkotlin/jvm/internal/DefaultConstructorMarker;)V
public final fun component1 ()Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;
public final fun component2 ()Z
public final fun copy (Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;Z)Lcom/github/avrokotlin/avro4k/AvroConfiguration;
public static synthetic fun copy$default (Lcom/github/avrokotlin/avro4k/AvroConfiguration;Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;ZILjava/lang/Object;)Lcom/github/avrokotlin/avro4k/AvroConfiguration;
public final fun component3 ()Z
public final fun copy (Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;ZZ)Lcom/github/avrokotlin/avro4k/AvroConfiguration;
public static synthetic fun copy$default (Lcom/github/avrokotlin/avro4k/AvroConfiguration;Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;ZZILjava/lang/Object;)Lcom/github/avrokotlin/avro4k/AvroConfiguration;
public fun equals (Ljava/lang/Object;)Z
public final fun getFieldNamingStrategy ()Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;
public final fun getImplicitNulls ()Z
public final fun getValidateSerialization ()Z
public fun hashCode ()I
public fun toString ()Ljava/lang/String;
}
Expand Down Expand Up @@ -204,9 +208,31 @@ public final class com/github/avrokotlin/avro4k/decoder/AvroDecoder$DefaultImpls
}

public final class com/github/avrokotlin/avro4k/decoder/AvroDecoderKt {
public static final fun decodeResolvingUnion (Lcom/github/avrokotlin/avro4k/decoder/AvroDecoder;Lkotlin/jvm/functions/Function0;Lkotlin/jvm/functions/Function1;)Ljava/lang/Object;
public static final fun decodeResolvingUnion (Lcom/github/avrokotlin/avro4k/decoder/AvroDecoder;Lorg/apache/avro/Schema;Lkotlin/jvm/functions/Function0;Lkotlin/jvm/functions/Function1;)Ljava/lang/Object;
public static final fun resolveUnion (Lcom/github/avrokotlin/avro4k/decoder/AvroDecoder;Lorg/apache/avro/Schema;Lkotlin/jvm/functions/Function1;)Ljava/lang/Object;
public static final fun decodeAnyResolvingUnion (Lcom/github/avrokotlin/avro4k/decoder/AvroDecoder;Lkotlin/jvm/functions/Function0;Lkotlin/jvm/functions/Function1;)Ljava/lang/Object;
public static final fun decodeBooleanResolvingUnion (Lcom/github/avrokotlin/avro4k/decoder/AvroDecoder;Lkotlin/jvm/functions/Function0;Lkotlin/jvm/functions/Function1;)Z
public static final fun decodeCharResolvingUnion (Lcom/github/avrokotlin/avro4k/decoder/AvroDecoder;Lkotlin/jvm/functions/Function0;Lkotlin/jvm/functions/Function1;)C
public static final fun decodeDoubleResolvingUnion (Lcom/github/avrokotlin/avro4k/decoder/AvroDecoder;Lkotlin/jvm/functions/Function0;Lkotlin/jvm/functions/Function1;)D
public static final fun decodeFloatResolvingUnion (Lcom/github/avrokotlin/avro4k/decoder/AvroDecoder;Lkotlin/jvm/functions/Function0;Lkotlin/jvm/functions/Function1;)F
public static final fun decodeIntResolvingUnion (Lcom/github/avrokotlin/avro4k/decoder/AvroDecoder;Lkotlin/jvm/functions/Function0;Lkotlin/jvm/functions/Function1;)I
public static final fun decodeLongResolvingUnion (Lcom/github/avrokotlin/avro4k/decoder/AvroDecoder;Lkotlin/jvm/functions/Function0;Lkotlin/jvm/functions/Function1;)J
public static final fun findValueDecoder (Lcom/github/avrokotlin/avro4k/decoder/AvroDecoder;Lkotlin/jvm/functions/Function0;Lkotlin/jvm/functions/Function1;)Ljava/lang/Object;
}

public abstract interface class com/github/avrokotlin/avro4k/decoder/CharSupplier {
public abstract fun asChar ()C
}

public abstract interface class com/github/avrokotlin/avro4k/decoder/FloatSupplier {
public abstract fun asFloat ()F
}

public abstract interface class com/github/avrokotlin/avro4k/decoder/UnionDecoder : com/github/avrokotlin/avro4k/decoder/AvroDecoder {
public abstract fun decodeAndResolveUnion ()V
}

public final class com/github/avrokotlin/avro4k/decoder/UnionDecoder$DefaultImpls {
public static fun decodeNullableSerializableValue (Lcom/github/avrokotlin/avro4k/decoder/UnionDecoder;Lkotlinx/serialization/DeserializationStrategy;)Ljava/lang/Object;
public static fun decodeSerializableValue (Lcom/github/avrokotlin/avro4k/decoder/UnionDecoder;Lkotlinx/serialization/DeserializationStrategy;)Ljava/lang/Object;
}

public abstract interface class com/github/avrokotlin/avro4k/encoder/AvroEncoder : kotlinx/serialization/encoding/Encoder {
Expand All @@ -226,14 +252,11 @@ public final class com/github/avrokotlin/avro4k/encoder/AvroEncoder$DefaultImpls

public final class com/github/avrokotlin/avro4k/encoder/AvroEncoderKt {
public static final fun encodeResolvingUnion (Lcom/github/avrokotlin/avro4k/encoder/AvroEncoder;Lkotlin/jvm/functions/Function0;Lkotlin/jvm/functions/Function1;)Ljava/lang/Object;
public static final fun encodeResolvingUnion (Lcom/github/avrokotlin/avro4k/encoder/AvroEncoder;Lorg/apache/avro/Schema;Lkotlin/jvm/functions/Function0;Lkotlin/jvm/functions/Function1;)Ljava/lang/Object;
public static final fun resolveUnion (Lcom/github/avrokotlin/avro4k/encoder/AvroEncoder;Lorg/apache/avro/Schema;Lkotlin/jvm/functions/Function1;)Ljava/lang/Object;
public static final fun resolveUnion (Lcom/github/avrokotlin/avro4k/encoder/AvroEncoder;Lorg/apache/avro/Schema;Lkotlin/jvm/functions/Function0;Lkotlin/jvm/functions/Function1;)Ljava/lang/Object;
}

public abstract interface class com/github/avrokotlin/avro4k/encoder/UnionEncoder : com/github/avrokotlin/avro4k/encoder/AvroEncoder {
public abstract fun getCurrentWriterSchema ()Lorg/apache/avro/Schema;
public abstract fun selectUnionIndex (I)V
public abstract fun setCurrentWriterSchema (Lorg/apache/avro/Schema;)V
public abstract fun encodeUnionIndex (I)V
}

public final class com/github/avrokotlin/avro4k/encoder/UnionEncoder$DefaultImpls {
Expand Down
1 change: 1 addition & 0 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dependencies {
api(libs.apache.avro)
api(libs.kotlinx.serialization.core)
implementation(libs.kotlinx.serialization.json)
implementation(libs.okio)
testImplementation(libs.kotest.junit5)
testImplementation(libs.kotest.core)
testImplementation(libs.kotest.json)
Expand Down
1 change: 1 addition & 0 deletions settings.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ dependencyResolutionManagement {
version("jvm", "21")

library("apache-avro", "org.apache.avro", "avro").version("1.11.3")
library("okio", "com.squareup.okio", "okio").version("3.9.0")

val kotlinxSerialization = "1.7.0-RC"
library("kotlinx-serialization-core", "org.jetbrains.kotlinx", "kotlinx-serialization-core").version(kotlinxSerialization)
Expand Down
52 changes: 38 additions & 14 deletions src/main/kotlin/com/github/avrokotlin/avro4k/Avro.kt
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
package com.github.avrokotlin.avro4k

import com.github.avrokotlin.avro4k.decoder.AvroValueDecoder
import com.github.avrokotlin.avro4k.decoder.AvroGenericValueDecoder
import com.github.avrokotlin.avro4k.decoder.direct.AvroValueDirectDecoder
import com.github.avrokotlin.avro4k.encoder.AvroValueEncoder
import com.github.avrokotlin.avro4k.encoder.direct.AvroValueDirectEncoder
import com.github.avrokotlin.avro4k.internal.EnumResolver
import com.github.avrokotlin.avro4k.internal.PolymorphicResolver
import com.github.avrokotlin.avro4k.internal.RecordResolver
import com.github.avrokotlin.avro4k.schema.FieldNamingStrategy
import com.github.avrokotlin.avro4k.schema.ValueVisitor
Expand All @@ -24,15 +27,13 @@ import kotlinx.serialization.modules.SerializersModule
import kotlinx.serialization.modules.contextual
import kotlinx.serialization.modules.overwriteWith
import kotlinx.serialization.serializer
import okio.Buffer
import org.apache.avro.Schema
import org.apache.avro.generic.GenericContainer
import org.apache.avro.generic.GenericDatumReader
import org.apache.avro.io.DecoderFactory
import org.apache.avro.io.EncoderFactory
import org.apache.avro.reflect.ReflectDatumWriter
import org.apache.avro.util.WeakIdentityHashMap
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.io.InputStream
import java.io.OutputStream

Expand All @@ -49,6 +50,7 @@ public sealed class Avro(
private val schemaCache: MutableMap<SerialDescriptor, Schema> = WeakIdentityHashMap()

internal val recordResolver = RecordResolver(this)
internal val polymorphicResolver = PolymorphicResolver(this.serializersModule)
internal val enumResolver = EnumResolver()

public companion object Default : Avro(
Expand Down Expand Up @@ -80,9 +82,18 @@ public sealed class Avro(
value: T,
outputStream: OutputStream,
) {
val avroEncoder = EncoderFactory.get().directBinaryEncoder(outputStream, null)
val genericData = encodeToGenericData(writerSchema, serializer, value)
ReflectDatumWriter<Any?>(writerSchema).write(genericData, avroEncoder)
val avroEncoder =
EncoderFactory.get().directBinaryEncoder(outputStream, null).let {
if (configuration.validateSerialization) {
EncoderFactory.get().validatingEncoder(writerSchema, it)
} else {
it
}
}

AvroValueDirectEncoder(writerSchema, this, avroEncoder)
.encodeSerializableValue(serializer, value)

avroEncoder.flush()
}

Expand All @@ -91,9 +102,10 @@ public sealed class Avro(
serializer: SerializationStrategy<T>,
value: T,
): ByteArray {
val outputStream = ByteArrayOutputStream()
val buffer = Buffer()
val outputStream = buffer.outputStream()
encodeToStream(writerSchema, serializer, value, outputStream)
return outputStream.toByteArray()
return buffer.readByteArray()
}

@ExperimentalSerializationApi
Expand All @@ -115,9 +127,17 @@ public sealed class Avro(
deserializer: DeserializationStrategy<T>,
inputStream: InputStream,
): T {
val avroDecoder = DecoderFactory.get().directBinaryDecoder(inputStream, null)
val genericData = GenericDatumReader<Any?>(writerSchema).read(null, avroDecoder)
return decodeFromGenericData(writerSchema, deserializer, genericData)
val avroDecoder =
DecoderFactory.get().directBinaryDecoder(inputStream, null).let {
if (configuration.validateSerialization) {
DecoderFactory.get().validatingDecoder(writerSchema, it)
} else {
it
}
}

return AvroValueDirectDecoder(writerSchema, this, avroDecoder)
.decodeSerializableValue(deserializer)
}

public fun <T> decodeFromByteArray(
Expand All @@ -134,7 +154,7 @@ public sealed class Avro(
deserializer: DeserializationStrategy<T>,
value: Any?,
): T {
return AvroValueDecoder(this, value, writerSchema)
return AvroGenericValueDecoder(this, value, writerSchema)
.decodeSerializableValue(deserializer)
}
}
Expand All @@ -154,12 +174,16 @@ public class AvroBuilder internal constructor(avro: Avro) {

@ExperimentalSerializationApi
public var implicitNulls: Boolean = avro.configuration.implicitNulls

@ExperimentalSerializationApi
public var validateSerialization: Boolean = avro.configuration.validateSerialization
public var serializersModule: SerializersModule = EmptySerializersModule()

public fun build(): AvroConfiguration =
AvroConfiguration(
fieldNamingStrategy = fieldNamingStrategy,
implicitNulls = implicitNulls
implicitNulls = implicitNulls,
validateSerialization = validateSerialization
)
}

Expand Down
13 changes: 13 additions & 0 deletions src/main/kotlin/com/github/avrokotlin/avro4k/AvroConfiguration.kt
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,21 @@ public data class AvroConfiguration(
val fieldNamingStrategy: FieldNamingStrategy = FieldNamingStrategy.Builtins.NoOp,
/**
* By default, set to `true`, the nullable fields that haven't any default value are set as null if the value is missing. It also adds `"default": null` to those fields when generating schema using avro4k.
*
* When set to `false`, during decoding, any missing value for a nullable field without default `null` value (e.g. `val field: Type?` without `= null`) is failing.
*/
@ExperimentalSerializationApi
val implicitNulls: Boolean = true,
/**
* **To be removed when binary support is stable.**
*
* Set it to `true` to enable validation in case of failure, mainly for debug purpose.
*
* By default, to `false`.
*
* @see [org.apache.avro.io.ValidatingEncoder]
* @see [org.apache.avro.io.ValidatingDecoder]
*/
@ExperimentalSerializationApi
val validateSerialization: Boolean = false,
)
109 changes: 86 additions & 23 deletions src/main/kotlin/com/github/avrokotlin/avro4k/decoder/AvroDecoder.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.encoding.Decoder
import org.apache.avro.Schema
import org.apache.avro.generic.GenericFixed
import java.util.function.BooleanSupplier
import java.util.function.DoubleSupplier
import java.util.function.IntSupplier
import java.util.function.LongSupplier

public interface AvroDecoder : Decoder {
/**
Expand All @@ -28,38 +32,97 @@ public interface AvroDecoder : Decoder {
public fun decodeValue(): Any
}

@PublishedApi
internal interface UnionDecoder : AvroDecoder {
/**
* Decode the union schema and set the resolved type in [currentWriterSchema].
*/
fun decodeAndResolveUnion()
}

@ExperimentalSerializationApi
public inline fun <T : Any> AvroDecoder.decodeResolvingUnion(
public inline fun <T : Any> AvroDecoder.decodeAnyResolvingUnion(
error: () -> Throwable,
resolver: (Schema) -> (() -> T)?,
): T {
val schema = currentWriterSchema
return decodeResolvingUnion(schema, error, resolver)
return findValueDecoder(error, resolver).invoke()
}

@PublishedApi
internal inline fun <T : Any> AvroDecoder.decodeResolvingUnion(
schema: Schema,
@ExperimentalSerializationApi
public inline fun AvroDecoder.decodeIntResolvingUnion(
error: () -> Throwable,
resolver: (Schema) -> (() -> T)?,
): T {
return if (schema.type == Schema.Type.UNION) {
resolveUnion(schema, resolver)
} else {
resolver(schema)?.invoke()
} ?: throw error()
resolver: (Schema) -> IntSupplier?,
): Int {
return findValueDecoder(error, resolver).asInt
}

@ExperimentalSerializationApi
public inline fun AvroDecoder.decodeLongResolvingUnion(
error: () -> Throwable,
resolver: (Schema) -> LongSupplier?,
): Long {
return findValueDecoder(error, resolver).asLong
}

@ExperimentalSerializationApi
public inline fun AvroDecoder.decodeBooleanResolvingUnion(
error: () -> Throwable,
resolver: (Schema) -> BooleanSupplier?,
): Boolean {
return findValueDecoder(error, resolver).asBoolean
}

@ExperimentalSerializationApi
public inline fun AvroDecoder.decodeFloatResolvingUnion(
error: () -> Throwable,
resolver: (Schema) -> FloatSupplier?,
): Float {
return findValueDecoder(error, resolver).asFloat()
}

@ExperimentalSerializationApi
public inline fun AvroDecoder.decodeDoubleResolvingUnion(
error: () -> Throwable,
resolver: (Schema) -> DoubleSupplier?,
): Double {
return findValueDecoder(error, resolver).asDouble
}

@ExperimentalSerializationApi
public inline fun AvroDecoder.decodeCharResolvingUnion(
error: () -> Throwable,
resolver: (Schema) -> CharSupplier?,
): Char {
return findValueDecoder(error, resolver).asChar()
}

@PublishedApi
internal inline fun <T> AvroDecoder.resolveUnion(
schema: Schema,
resolver: (Schema) -> (() -> T)?,
): T? {
for (index in schema.types.indices) {
val subSchema = schema.types[index]
resolver(subSchema)?.let {
return it.invoke()
internal inline fun <T : Any> AvroDecoder.findValueDecoder(
error: () -> Throwable,
resolver: (Schema) -> T?,
): T {
val schema = currentWriterSchema

val foundResolver =
if (schema.type == Schema.Type.UNION) {
if (this is UnionDecoder) {
decodeAndResolveUnion()
resolver(currentWriterSchema)
} else {
currentWriterSchema.types.firstNotNullOfOrNull(resolver)
}
} else {
resolver(schema)
}
}
return null
return foundResolver ?: throw error()
}

@ExperimentalSerializationApi
public fun interface FloatSupplier {
public fun asFloat(): Float
}

@ExperimentalSerializationApi
public fun interface CharSupplier {
public fun asChar(): Char
}
Loading

0 comments on commit d0a7d7e

Please sign in to comment.