Skip to content

Commit

Permalink
perf: Improve encoding performances with inlined union resolving
Browse files Browse the repository at this point in the history
  • Loading branch information
Chuckame committed May 9, 2024
1 parent e66ed3e commit 0ec51a5
Show file tree
Hide file tree
Showing 30 changed files with 567 additions and 587 deletions.
107 changes: 4 additions & 103 deletions api/avro4k-core.api
Original file line number Diff line number Diff line change
Expand Up @@ -26,25 +26,22 @@ public synthetic class com/github/avrokotlin/avro4k/AvroAlias$Impl : com/github/

public final class com/github/avrokotlin/avro4k/AvroBuilder {
public final fun build ()Lcom/github/avrokotlin/avro4k/AvroConfiguration;
public final fun getEncodedAs ()Lcom/github/avrokotlin/avro4k/EncodedAs;
public final fun getFieldNamingStrategy ()Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;
public final fun getImplicitNulls ()Z
public final fun getSerializersModule ()Lkotlinx/serialization/modules/SerializersModule;
public final fun setEncodedAs (Lcom/github/avrokotlin/avro4k/EncodedAs;)V
public final fun setFieldNamingStrategy (Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;)V
public final fun setImplicitNulls (Z)V
public final fun setSerializersModule (Lkotlinx/serialization/modules/SerializersModule;)V
}

public final class com/github/avrokotlin/avro4k/AvroConfiguration {
public fun <init> ()V
public fun <init> (Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;ZLcom/github/avrokotlin/avro4k/EncodedAs;)V
public synthetic fun <init> (Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;ZLcom/github/avrokotlin/avro4k/EncodedAs;ILkotlin/jvm/internal/DefaultConstructorMarker;)V
public fun <init> (Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;Z)V
public synthetic fun <init> (Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;ZILkotlin/jvm/internal/DefaultConstructorMarker;)V
public final fun component1 ()Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;
public final fun component2 ()Z
public final fun component3 ()Lcom/github/avrokotlin/avro4k/EncodedAs;
public final fun copy (Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;ZLcom/github/avrokotlin/avro4k/EncodedAs;)Lcom/github/avrokotlin/avro4k/AvroConfiguration;
public static synthetic fun copy$default (Lcom/github/avrokotlin/avro4k/AvroConfiguration;Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;ZLcom/github/avrokotlin/avro4k/EncodedAs;ILjava/lang/Object;)Lcom/github/avrokotlin/avro4k/AvroConfiguration;
public final fun copy (Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;Z)Lcom/github/avrokotlin/avro4k/AvroConfiguration;
public static synthetic fun copy$default (Lcom/github/avrokotlin/avro4k/AvroConfiguration;Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;ZILjava/lang/Object;)Lcom/github/avrokotlin/avro4k/AvroConfiguration;
public fun equals (Ljava/lang/Object;)Z
public final fun getFieldNamingStrategy ()Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy;
public final fun getImplicitNulls ()Z
Expand Down Expand Up @@ -181,97 +178,6 @@ public final class com/github/avrokotlin/avro4k/encoder/AvroEncoder$DefaultImpls
public static fun encodeSerializableValue (Lcom/github/avrokotlin/avro4k/encoder/AvroEncoder;Lkotlinx/serialization/SerializationStrategy;Ljava/lang/Object;)V
}

public abstract class com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher {
public fun toString ()Ljava/lang/String;
}

public final class com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$FirstArray : com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher {
public static final field INSTANCE Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$FirstArray;
}

public final class com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$FirstMap : com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher {
public static final field INSTANCE Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$FirstMap;
}

public abstract class com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named : com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher {
}

public final class com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named$Enum : com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named {
public fun <init> (Ljava/lang/String;)V
public final fun component1 ()Ljava/lang/String;
public final fun copy (Ljava/lang/String;)Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named$Enum;
public static synthetic fun copy$default (Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named$Enum;Ljava/lang/String;ILjava/lang/Object;)Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named$Enum;
public fun equals (Ljava/lang/Object;)Z
public final fun getFullName ()Ljava/lang/String;
public fun hashCode ()I
public fun toString ()Ljava/lang/String;
}

public final class com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named$FirstEnum : com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named {
public static final field INSTANCE Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named$FirstEnum;
}

public final class com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named$FirstFixed : com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named {
public static final field INSTANCE Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named$FirstFixed;
}

public final class com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named$Fixed : com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named {
public fun <init> (Ljava/lang/String;)V
public final fun component1 ()Ljava/lang/String;
public final fun copy (Ljava/lang/String;)Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named$Fixed;
public static synthetic fun copy$default (Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named$Fixed;Ljava/lang/String;ILjava/lang/Object;)Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named$Fixed;
public fun equals (Ljava/lang/Object;)Z
public final fun getFullName ()Ljava/lang/String;
public fun hashCode ()I
public fun toString ()Ljava/lang/String;
}

public final class com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named$Record : com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named {
public fun <init> (Ljava/lang/String;)V
public final fun component1 ()Ljava/lang/String;
public final fun copy (Ljava/lang/String;)Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named$Record;
public static synthetic fun copy$default (Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named$Record;Ljava/lang/String;ILjava/lang/Object;)Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Named$Record;
public fun equals (Ljava/lang/Object;)Z
public final fun getFullName ()Ljava/lang/String;
public fun hashCode ()I
public fun toString ()Ljava/lang/String;
}

public abstract class com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar : com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher {
}

public final class com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar$BOOLEAN : com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar {
public static final field INSTANCE Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar$BOOLEAN;
}

public final class com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar$BYTES : com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar {
public static final field INSTANCE Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar$BYTES;
}

public final class com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar$DOUBLE : com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar {
public static final field INSTANCE Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar$DOUBLE;
}

public final class com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar$FLOAT : com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar {
public static final field INSTANCE Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar$FLOAT;
}

public final class com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar$INT : com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar {
public static final field INSTANCE Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar$INT;
}

public final class com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar$LONG : com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar {
public static final field INSTANCE Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar$LONG;
}

public final class com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar$NULL : com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar {
public static final field INSTANCE Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar$NULL;
}

public final class com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar$STRING : com/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar {
public static final field INSTANCE Lcom/github/avrokotlin/avro4k/encoder/SchemaTypeMatcher$Scalar$STRING;
}

public abstract interface class com/github/avrokotlin/avro4k/schema/FieldNamingStrategy {
public static final field Builtins Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy$Builtins;
public abstract fun resolve (Lkotlinx/serialization/descriptors/SerialDescriptor;I)Ljava/lang/String;
Expand All @@ -280,11 +186,6 @@ public abstract interface class com/github/avrokotlin/avro4k/schema/FieldNamingS
public final class com/github/avrokotlin/avro4k/schema/FieldNamingStrategy$Builtins {
}

public final class com/github/avrokotlin/avro4k/schema/FieldNamingStrategy$Builtins$NoOp : com/github/avrokotlin/avro4k/schema/FieldNamingStrategy {
public static final field INSTANCE Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy$Builtins$NoOp;
public fun resolve (Lkotlinx/serialization/descriptors/SerialDescriptor;I)Ljava/lang/String;
}

public final class com/github/avrokotlin/avro4k/schema/FieldNamingStrategy$Builtins$SnakeCase : com/github/avrokotlin/avro4k/schema/FieldNamingStrategy {
public static final field INSTANCE Lcom/github/avrokotlin/avro4k/schema/FieldNamingStrategy$Builtins$SnakeCase;
public fun resolve (Lkotlinx/serialization/descriptors/SerialDescriptor;I)Ljava/lang/String;
Expand Down
2 changes: 1 addition & 1 deletion build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ publishing {
pom {
val projectUrl = "https://github.com/avro-kotlin/avro4k"
name.set("avro4k-core")
description.set("Avro format support for kotlinx.serialization")
description.set("Avro binary format support for kotlin, built on top of kotlinx-serialization")
url.set(projectUrl)

scm {
Expand Down
27 changes: 10 additions & 17 deletions src/main/kotlin/com/github/avrokotlin/avro4k/Avro.kt
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ import org.apache.avro.generic.GenericDatumReader
import org.apache.avro.io.DecoderFactory
import org.apache.avro.io.EncoderFactory
import org.apache.avro.reflect.ReflectDatumWriter
import org.apache.avro.util.WeakIdentityHashMap
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.io.InputStream
import java.io.OutputStream
import java.util.concurrent.ConcurrentHashMap

/**
* The goal of this class is to serialize and deserialize in avro binary format, not in GenericRecords.
Expand All @@ -44,7 +44,11 @@ public sealed class Avro(
public val configuration: AvroConfiguration,
public val serializersModule: SerializersModule,
) {
private val schemaCache: MutableMap<SerialDescriptor, Schema> = ConcurrentHashMap()
// We use the identity hash map because we could have multiple descriptors with the same name, especially
// when having 2 different version of the schema for the same name. kotlinx-serialization is instanciating the descriptors
// only once, so we are safe in the main use cases. Combined with weak references to avoid memory leaks.
private val schemaCache: MutableMap<SerialDescriptor, Schema> = WeakIdentityHashMap()

internal val recordResolver = RecordResolver(this)
internal val unionResolver = UnionResolver()
internal val enumResolver = EnumResolver()
Expand All @@ -68,7 +72,7 @@ public sealed class Avro(
return schemaCache.getOrPut(descriptor) {
lateinit var output: Schema
ValueVisitor(this) { output = it }.visitValue(descriptor)
return output
output
}
}

Expand All @@ -78,12 +82,7 @@ public sealed class Avro(
value: T,
outputStream: OutputStream,
) {
val avroEncoder =
when (configuration.encodedAs) {
EncodedAs.BINARY -> EncoderFactory.get().binaryEncoder(outputStream, null)
EncodedAs.JSON_COMPACT -> EncoderFactory.get().jsonEncoder(writerSchema, outputStream, false)
EncodedAs.JSON_PRETTY -> EncoderFactory.get().jsonEncoder(writerSchema, outputStream, true)
}
val avroEncoder = EncoderFactory.get().directBinaryEncoder(outputStream, null)
val genericData = encodeToGenericData(writerSchema, serializer, value)
ReflectDatumWriter<Any?>(writerSchema).write(genericData, avroEncoder)
avroEncoder.flush()
Expand Down Expand Up @@ -116,11 +115,7 @@ public sealed class Avro(
deserializer: DeserializationStrategy<T>,
inputStream: InputStream,
): T {
val avroDecoder =
when (configuration.encodedAs) {
EncodedAs.BINARY -> DecoderFactory.get().binaryDecoder(inputStream, null)
EncodedAs.JSON_COMPACT, EncodedAs.JSON_PRETTY -> DecoderFactory.get().jsonDecoder(writerSchema, inputStream)
}
val avroDecoder = DecoderFactory.get().directBinaryDecoder(inputStream, null)
val genericData = GenericDatumReader<Any?>(writerSchema).read(null, avroDecoder)
return decodeFromGenericData(writerSchema, deserializer, genericData)
}
Expand Down Expand Up @@ -155,14 +150,12 @@ public fun Avro(
public class AvroBuilder internal constructor(avro: Avro) {
public var fieldNamingStrategy: FieldNamingStrategy = avro.configuration.fieldNamingStrategy
public var implicitNulls: Boolean = avro.configuration.implicitNulls
public var encodedAs: EncodedAs = avro.configuration.encodedAs
public var serializersModule: SerializersModule = EmptySerializersModule()

public fun build(): AvroConfiguration =
AvroConfiguration(
fieldNamingStrategy = fieldNamingStrategy,
implicitNulls = implicitNulls,
encodedAs = encodedAs
implicitNulls = implicitNulls
)
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package com.github.avrokotlin.avro4k

import com.github.avrokotlin.avro4k.schema.FieldNamingStrategy
import kotlinx.serialization.ExperimentalSerializationApi

public data class AvroConfiguration(
/**
Expand All @@ -15,18 +14,4 @@ public data class AvroConfiguration(
* When set to `false`, during decoding, any missing value for a nullable field without default `null` value (e.g. `val field: Type?` without `= null`) is failing.
*/
val implicitNulls: Boolean = true,
/**
* The encoding format to use when encoding and decoding avro data. Default is [EncodedAs.BINARY].
*
* @see EncodedAs
*/
@ExperimentalSerializationApi
val encodedAs: EncodedAs = EncodedAs.BINARY,
)

@ExperimentalSerializationApi
public enum class EncodedAs {
BINARY,
JSON_COMPACT,
JSON_PRETTY,
}
)
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ import java.nio.ByteOrder
*
* [spec](https://avro.apache.org/docs/1.11.1/specification/#single-object-encoding)
*
* @param findSchemaByFingerprint a function to find a schema by its fingerprint, and returns null when not found
* @param schemaRegistry a function to find a schema by its fingerprint, and returns null when not found. You should use [SchemaNormalization.parsingFingerprint64] to generate the fingerprint.
*/
@ExperimentalSerializationApi
public class AvroSingleObject(
private val findSchemaByFingerprint: (Long) -> Schema?,
private val schemaRegistry: (fingerprint: Long) -> Schema?,
@PublishedApi
internal val avro: Avro = Avro,
) {
Expand All @@ -51,7 +51,7 @@ public class AvroSingleObject(
check(inputStream.read() == FORMAT_VERSION) { "Not a valid single-object avro format, bad version byte" }
val fingerprint = ByteBuffer.wrap(ByteArray(8).apply { inputStream.read(this) }).order(ByteOrder.LITTLE_ENDIAN).getLong()
val writerSchema =
findSchemaByFingerprint(fingerprint) ?: throw SerializationException("schema not found for the given object's schema fingerprint 0x${fingerprint.toString(16)}")
schemaRegistry(fingerprint) ?: throw SerializationException("schema not found for the given object's schema fingerprint 0x${fingerprint.toString(16)}")

return avro.decodeFromStream(writerSchema, deserializer, inputStream)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import java.nio.ByteBuffer

@OptIn(InternalSerializationApi::class)
internal abstract class AvroTaggedDecoder<Tag> : TaggedDecoder<Tag>(), AvroDecoder {
protected abstract val avro: Avro
internal abstract val avro: Avro

protected abstract val Tag.writerSchema: Schema

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ package com.github.avrokotlin.avro4k.decoder
import com.github.avrokotlin.avro4k.Avro
import com.github.avrokotlin.avro4k.AvroAlias
import com.github.avrokotlin.avro4k.internal.IllegalIndexedAccessError
import com.github.avrokotlin.avro4k.schema.findAnnotation
import com.github.avrokotlin.avro4k.schema.nonNullSerialName
import com.github.avrokotlin.avro4k.schema.possibleSerializationSubclasses
import com.github.avrokotlin.avro4k.internal.findAnnotation
import com.github.avrokotlin.avro4k.internal.nonNullSerialName
import com.github.avrokotlin.avro4k.internal.possibleSerializationSubclasses
import kotlinx.serialization.DeserializationStrategy
import kotlinx.serialization.SerializationException
import kotlinx.serialization.descriptors.SerialDescriptor
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package com.github.avrokotlin.avro4k.encoder

import com.github.avrokotlin.avro4k.Avro
import com.github.avrokotlin.avro4k.schema.ensureTypeOf
import com.github.avrokotlin.avro4k.internal.ensureTypeOf
import kotlinx.serialization.descriptors.SerialDescriptor
import org.apache.avro.Schema
import org.apache.avro.generic.GenericArray
Expand Down
64 changes: 10 additions & 54 deletions src/main/kotlin/com/github/avrokotlin/avro4k/encoder/AvroEncoder.kt
Original file line number Diff line number Diff line change
Expand Up @@ -21,61 +21,17 @@ public interface AvroEncoder : Encoder {

@ExperimentalSerializationApi
public fun encodeFixed(value: GenericFixed)

/**
* Helps to encode a value in different ways depending on the type of the writer schema.
* Each encoder have to return the encoded value for the matched schema.
*
* @param kotlinTypeName represents the kotlin type name of the encoded value for debugging purposes as it's used in exceptions. This is not the written avro type name.
*/
@ExperimentalSerializationApi
public fun encodeValueResolved(
vararg encoders: Pair<SchemaTypeMatcher, (Schema) -> Any>,
kotlinTypeName: String,
)
}

@ExperimentalSerializationApi
public inline fun <reified T : Any> AvroEncoder.encodeValueResolved(vararg encoders: Pair<SchemaTypeMatcher, (Schema) -> Any>) {
encodeValueResolved(*encoders, kotlinTypeName = T::class.qualifiedName!!)
}

public sealed class SchemaTypeMatcher {
public sealed class Scalar : SchemaTypeMatcher() {
public object BOOLEAN : Scalar()

public object INT : Scalar()

public object LONG : Scalar()

public object FLOAT : Scalar()

public object DOUBLE : Scalar()

public object STRING : Scalar()

public object BYTES : Scalar()

public object NULL : Scalar()
}

public object FirstArray : SchemaTypeMatcher()

public object FirstMap : SchemaTypeMatcher()

public sealed class Named : SchemaTypeMatcher() {
public object FirstFixed : Named()

public object FirstEnum : Named()

public data class Fixed(val fullName: String) : Named()

public data class Enum(val fullName: String) : Named()

public data class Record(val fullName: String) : Named()
}

override fun toString(): String {
return this::class.simpleName!!
}
public inline fun <T : Any> AvroEncoder.encodeResolvingUnion(
error: () -> Throwable,
resolver: (Schema) -> T?,
): T {
val schema = currentWriterSchema
return if (schema.type == Schema.Type.UNION) {
schema.types.firstNotNullOfOrNull(resolver)
} else {
resolver(schema)
} ?: throw error()
}
Loading

0 comments on commit 0ec51a5

Please sign in to comment.