From 044ccc24eaebb85ee9e0063b434445cb01d4f32a Mon Sep 17 00:00:00 2001 From: Chuckame Date: Tue, 23 May 2023 11:19:12 +0200 Subject: [PATCH] feat(schema): Implicit nulls (false by default) --- README.md | 23 +++++++++ .../com/github/avrokotlin/avro4k/Avro.kt | 14 ++++-- .../avrokotlin/avro4k/AvroConfiguration.kt | 9 +++- .../avro4k/schema/ClassSchemaFor.kt | 49 +++++++++++-------- .../avrokotlin/avro4k/schema/SchemaFor.kt | 45 ++++++++++------- .../avro4k/schema/UnionSchemaFor.kt | 5 +- .../schema/NullableWithDefaultsSchemaTest.kt | 18 +++++++ .../resources/nullables-with-defaults.json | 23 +++++++++ 8 files changed, 143 insertions(+), 43 deletions(-) create mode 100644 src/test/kotlin/com/github/avrokotlin/avro4k/schema/NullableWithDefaultsSchemaTest.kt create mode 100644 src/test/resources/nullables-with-defaults.json diff --git a/README.md b/README.md index ecb0f600..85fc67b6 100644 --- a/README.md +++ b/README.md @@ -415,6 +415,29 @@ Would result in the following schema: } ``` +### Nullable fields, optional fields and compatibility + +#### TL;DR; +To make your nullable fields optional (put `default: null` on all nullable fields if no other explicit default provided) and be able to remove nullable fields regarding compatibility checks, +you can set in the configuration the `defaultNullForNullableFields` to `true`. Example: +```kotlin +Avro(AvroConfiguration(defaultNullForNullableFields = true)) +``` + +#### Longer story + +With avro, you can have nullable fields and optional fields, that are taken into account for compatibility checking when using the schema registry. + +But if you want to remove a nullable field that is not optional, depending on the compatibility mode, it may not be compatible because of the missing default value. + +- What is an optional field ? +> An optional field is a field that have a *default* value, like an int with a default as `-1`. + +- What is a nullable field ? +> A nullable field is a field that contains a `null` type in its type union, but **it's not an optional field if you don't put `default` value to `null`**. + +So to mark a field as optional and facilitate avro contract evolution regarding compatibility checks, then set `default` to `null`. + ## Types diff --git a/src/main/kotlin/com/github/avrokotlin/avro4k/Avro.kt b/src/main/kotlin/com/github/avrokotlin/avro4k/Avro.kt index 6e984525..aad93b3b 100644 --- a/src/main/kotlin/com/github/avrokotlin/avro4k/Avro.kt +++ b/src/main/kotlin/com/github/avrokotlin/avro4k/Avro.kt @@ -4,10 +4,18 @@ package com.github.avrokotlin.avro4k import com.github.avrokotlin.avro4k.decoder.RootRecordDecoder import com.github.avrokotlin.avro4k.encoder.RootRecordEncoder -import com.github.avrokotlin.avro4k.io.* +import com.github.avrokotlin.avro4k.io.AvroDecodeFormat +import com.github.avrokotlin.avro4k.io.AvroEncodeFormat +import com.github.avrokotlin.avro4k.io.AvroFormat +import com.github.avrokotlin.avro4k.io.AvroInputStream +import com.github.avrokotlin.avro4k.io.AvroOutputStream import com.github.avrokotlin.avro4k.schema.schemaFor import com.github.avrokotlin.avro4k.serializer.UUIDSerializer -import kotlinx.serialization.* +import kotlinx.serialization.BinaryFormat +import kotlinx.serialization.DeserializationStrategy +import kotlinx.serialization.ExperimentalSerializationApi +import kotlinx.serialization.SerialFormat +import kotlinx.serialization.SerializationStrategy import kotlinx.serialization.descriptors.SerialDescriptor import kotlinx.serialization.modules.SerializersModule import kotlinx.serialization.modules.contextual @@ -259,7 +267,7 @@ class Avro( serializersModule, descriptor, descriptor.annotations, - configuration.namingStrategy, + configuration, mutableMapOf() ).schema() diff --git a/src/main/kotlin/com/github/avrokotlin/avro4k/AvroConfiguration.kt b/src/main/kotlin/com/github/avrokotlin/avro4k/AvroConfiguration.kt index b441f5d9..3ac834db 100644 --- a/src/main/kotlin/com/github/avrokotlin/avro4k/AvroConfiguration.kt +++ b/src/main/kotlin/com/github/avrokotlin/avro4k/AvroConfiguration.kt @@ -3,4 +3,11 @@ package com.github.avrokotlin.avro4k import com.github.avrokotlin.avro4k.schema.DefaultNamingStrategy import com.github.avrokotlin.avro4k.schema.NamingStrategy -data class AvroConfiguration(val namingStrategy: NamingStrategy = DefaultNamingStrategy) +data class AvroConfiguration( + val namingStrategy: NamingStrategy = DefaultNamingStrategy, + /** + * By default, during decoding, any missing value for a nullable field without default [null] value (e.g. `val field: Type?` without `= null`) is failing. + * When set to [true], the nullable fields that haven't any default value are set as null if the value is missing. It also adds `"default": null` to those fields when generating schema using avro4k. + */ + val implicitNulls: Boolean = false, +) diff --git a/src/main/kotlin/com/github/avrokotlin/avro4k/schema/ClassSchemaFor.kt b/src/main/kotlin/com/github/avrokotlin/avro4k/schema/ClassSchemaFor.kt index 84b15e03..6a0c97e9 100644 --- a/src/main/kotlin/com/github/avrokotlin/avro4k/schema/ClassSchemaFor.kt +++ b/src/main/kotlin/com/github/avrokotlin/avro4k/schema/ClassSchemaFor.kt @@ -2,6 +2,7 @@ package com.github.avrokotlin.avro4k.schema import com.github.avrokotlin.avro4k.AnnotationExtractor import com.github.avrokotlin.avro4k.Avro +import com.github.avrokotlin.avro4k.AvroConfiguration import com.github.avrokotlin.avro4k.AvroProp import com.github.avrokotlin.avro4k.RecordNaming import kotlinx.serialization.ExperimentalSerializationApi @@ -22,7 +23,7 @@ import org.apache.avro.SchemaBuilder @ExperimentalSerializationApi class ClassSchemaFor( private val descriptor: SerialDescriptor, - private val namingStrategy: NamingStrategy, + private val configuration: AvroConfiguration, private val serializersModule: SerializersModule, private val resolvedSchemas: MutableMap ) : SchemaFor { @@ -74,12 +75,12 @@ class ClassSchemaFor( val fieldDescriptor = descriptor.getElementDescriptor(index) val annos = AnnotationExtractor(descriptor.getElementAnnotations( index)) - val fieldNaming = RecordNaming(descriptor, index, namingStrategy) + val fieldNaming = RecordNaming(descriptor, index, configuration.namingStrategy) val schema = schemaFor( serializersModule, fieldDescriptor, descriptor.getElementAnnotations(index), - namingStrategy, + configuration, resolvedSchemas ).schema() @@ -89,7 +90,7 @@ class ClassSchemaFor( val (size, name) = when (val a = annos.fixed()) { null -> { val fieldAnnos = AnnotationExtractor(fieldDescriptor.annotations) - val n = RecordNaming(fieldDescriptor, namingStrategy) + val n = RecordNaming(fieldDescriptor, configuration.namingStrategy) when (val b = fieldAnnos.fixed()) { null -> 0 to n.name else -> b to n.name @@ -115,27 +116,37 @@ class ClassSchemaFor( else -> schemaOrFixed.overrideNamespace(ns) } - val default: Any? = annos.default()?.let { - when { - it == Avro.NULL -> Schema.Field.NULL_DEFAULT_VALUE - schemaWithResolvedNamespace.extractNonNull().type in listOf( - Schema.Type.FIXED, - Schema.Type.BYTES, - Schema.Type.STRING, - Schema.Type.ENUM - ) -> it - else -> json.parseToJsonElement(it).convertToAvroDefault() - } - } + val default: Any? = getDefaultValue(annos, schemaWithResolvedNamespace, fieldDescriptor) val field = Schema.Field(fieldNaming.name, schemaWithResolvedNamespace, annos.doc(), default) - val props = this.descriptor.getElementAnnotations(index).filterIsInstance() - props.forEach { field.addProp(it.key, it.value) } + this.descriptor.getElementAnnotations(index) + .filterIsInstance() + .forEach { field.addProp(it.key, it.value) } annos.aliases().forEach { field.addAlias(it) } return field } + private fun getDefaultValue( + annos: AnnotationExtractor, + schemaWithResolvedNamespace: Schema, + fieldDescriptor: SerialDescriptor + ) = annos.default()?.let { annotationDefaultValue -> + when { + annotationDefaultValue == Avro.NULL -> Schema.Field.NULL_DEFAULT_VALUE + schemaWithResolvedNamespace.extractNonNull().type in listOf( + Schema.Type.FIXED, + Schema.Type.BYTES, + Schema.Type.STRING, + Schema.Type.ENUM + ) -> annotationDefaultValue + + else -> json.parseToJsonElement(annotationDefaultValue).convertToAvroDefault() + } + } ?: if (configuration.implicitNulls && fieldDescriptor.isNullable) { + Schema.Field.NULL_DEFAULT_VALUE + } else null + private fun JsonElement.convertToAvroDefault() : Any{ return when(this){ is JsonNull -> JsonProperties.NULL_VALUE @@ -155,6 +166,4 @@ class ClassSchemaFor( } } } - - } diff --git a/src/main/kotlin/com/github/avrokotlin/avro4k/schema/SchemaFor.kt b/src/main/kotlin/com/github/avrokotlin/avro4k/schema/SchemaFor.kt index ddb8d336..bd8916f6 100644 --- a/src/main/kotlin/com/github/avrokotlin/avro4k/schema/SchemaFor.kt +++ b/src/main/kotlin/com/github/avrokotlin/avro4k/schema/SchemaFor.kt @@ -2,6 +2,7 @@ package com.github.avrokotlin.avro4k.schema import com.github.avrokotlin.avro4k.AnnotationExtractor import com.github.avrokotlin.avro4k.Avro +import com.github.avrokotlin.avro4k.AvroConfiguration import com.github.avrokotlin.avro4k.RecordNaming import kotlinx.serialization.ExperimentalSerializationApi import kotlinx.serialization.InternalSerializationApi @@ -64,7 +65,7 @@ class EnumSchemaFor( @ExperimentalSerializationApi class PairSchemaFor(private val descriptor: SerialDescriptor, - private val namingStrategy: NamingStrategy, + private val configuration: AvroConfiguration, private val serializersModule: SerializersModule, private val resolvedSchemas: MutableMap ) : SchemaFor { @@ -74,14 +75,14 @@ class PairSchemaFor(private val descriptor: SerialDescriptor, serializersModule, descriptor.getElementDescriptor(0), descriptor.getElementAnnotations(0), - namingStrategy, + configuration, resolvedSchemas ) val b = schemaFor( serializersModule, descriptor.getElementDescriptor(1), descriptor.getElementAnnotations(1), - namingStrategy, + configuration, resolvedSchemas ) return SchemaBuilder.unionOf() @@ -91,10 +92,11 @@ class PairSchemaFor(private val descriptor: SerialDescriptor, .endUnion() } } + @ExperimentalSerializationApi class ListSchemaFor(private val descriptor: SerialDescriptor, private val serializersModule: SerializersModule, - private val namingStrategy: NamingStrategy, + private val configuration: AvroConfiguration, private val resolvedSchemas: MutableMap ) : SchemaFor { @@ -107,7 +109,7 @@ class ListSchemaFor(private val descriptor: SerialDescriptor, val elementSchema = schemaFor(serializersModule, elementType, descriptor.getElementAnnotations(0), - namingStrategy, + configuration, resolvedSchemas ).schema() return Schema.createArray(elementSchema) @@ -115,10 +117,11 @@ class ListSchemaFor(private val descriptor: SerialDescriptor, } } } + @ExperimentalSerializationApi class MapSchemaFor(private val descriptor: SerialDescriptor, private val serializersModule: SerializersModule, - private val namingStrategy: NamingStrategy, + private val configuration: AvroConfiguration, private val resolvedSchemas: MutableMap ) : SchemaFor { @@ -131,25 +134,31 @@ class MapSchemaFor(private val descriptor: SerialDescriptor, serializersModule, valueType, descriptor.getElementAnnotations(1), - namingStrategy, + configuration, resolvedSchemas ).schema() return Schema.createMap(valueSchema) } + else -> throw RuntimeException("Avro only supports STRING as the key type in a MAP") } } } + @ExperimentalSerializationApi -class NullableSchemaFor(private val schemaFor: SchemaFor, private val annotations : List) : SchemaFor { +class NullableSchemaFor( + private val schemaFor: SchemaFor, + private val annotations: List, +) : SchemaFor { - private val nullFirst by lazy{ + private val nullFirst by lazy { //The default value can only be of the first type in the union definition. //Therefore we have to check the default value in order to decide the order of types within the union. //If no default is set, or if the default value is of type "null", nulls will be first. val default = AnnotationExtractor(annotations).default() default == null || default == Avro.NULL } + override fun schema(): Schema { val elementSchema = schemaFor.schema() val nullSchema = SchemaBuilder.builder().nullType() @@ -162,7 +171,7 @@ class NullableSchemaFor(private val schemaFor: SchemaFor, private val annotation fun schemaFor(serializersModule: SerializersModule, descriptor: SerialDescriptor, annos: List, - namingStrategy: NamingStrategy, + configuration: AvroConfiguration, resolvedSchemas: MutableMap ): SchemaFor { @@ -173,7 +182,7 @@ fun schemaFor(serializersModule: SerializersModule, } else descriptor val schemaFor: SchemaFor = when (underlying) { - is AvroDescriptor -> SchemaFor.const(underlying.schema(annos, serializersModule, namingStrategy)) + is AvroDescriptor -> SchemaFor.const(underlying.schema(annos, serializersModule, configuration.namingStrategy)) else -> when (descriptor.unwrapValueClass.kind) { PrimitiveKind.STRING -> SchemaFor.StringSchemaFor PrimitiveKind.LONG -> SchemaFor.LongSchemaFor @@ -193,16 +202,18 @@ fun schemaFor(serializersModule: SerializersModule, "Contextual or default serializer not found for $descriptor " }, annos, - namingStrategy, + configuration, resolvedSchemas ) + StructureKind.CLASS, StructureKind.OBJECT -> when (descriptor.serialName) { - "kotlin.Pair" -> PairSchemaFor(descriptor, namingStrategy, serializersModule, resolvedSchemas) - else -> ClassSchemaFor(descriptor, namingStrategy, serializersModule, resolvedSchemas) + "kotlin.Pair" -> PairSchemaFor(descriptor, configuration, serializersModule, resolvedSchemas) + else -> ClassSchemaFor(descriptor, configuration, serializersModule, resolvedSchemas) } - StructureKind.LIST -> ListSchemaFor(descriptor, serializersModule, namingStrategy, resolvedSchemas) - StructureKind.MAP -> MapSchemaFor(descriptor, serializersModule, namingStrategy, resolvedSchemas) - is PolymorphicKind -> UnionSchemaFor(descriptor, namingStrategy, serializersModule, resolvedSchemas) + + StructureKind.LIST -> ListSchemaFor(descriptor, serializersModule, configuration, resolvedSchemas) + StructureKind.MAP -> MapSchemaFor(descriptor, serializersModule, configuration, resolvedSchemas) + is PolymorphicKind -> UnionSchemaFor(descriptor, configuration, serializersModule, resolvedSchemas) else -> throw SerializationException("Unsupported type ${descriptor.serialName} of ${descriptor.kind}") } } diff --git a/src/main/kotlin/com/github/avrokotlin/avro4k/schema/UnionSchemaFor.kt b/src/main/kotlin/com/github/avrokotlin/avro4k/schema/UnionSchemaFor.kt index 42591ce2..ae51eb36 100644 --- a/src/main/kotlin/com/github/avrokotlin/avro4k/schema/UnionSchemaFor.kt +++ b/src/main/kotlin/com/github/avrokotlin/avro4k/schema/UnionSchemaFor.kt @@ -1,5 +1,6 @@ package com.github.avrokotlin.avro4k.schema +import com.github.avrokotlin.avro4k.AvroConfiguration import com.github.avrokotlin.avro4k.RecordNaming import com.github.avrokotlin.avro4k.possibleSerializationSubclasses import kotlinx.serialization.ExperimentalSerializationApi @@ -10,7 +11,7 @@ import org.apache.avro.Schema @ExperimentalSerializationApi class UnionSchemaFor( private val descriptor: SerialDescriptor, - private val namingStrategy: NamingStrategy, + private val configuration: AvroConfiguration, private val serializersModule: SerializersModule, private val resolvedSchemas: MutableMap ) : SchemaFor { @@ -19,7 +20,7 @@ class UnionSchemaFor( descriptor.possibleSerializationSubclasses(serializersModule).sortedBy { it.serialName } return Schema.createUnion( leafSerialDescriptors.map { - ClassSchemaFor(it, namingStrategy, serializersModule, resolvedSchemas).schema() + ClassSchemaFor(it, configuration, serializersModule, resolvedSchemas).schema() } ) } diff --git a/src/test/kotlin/com/github/avrokotlin/avro4k/schema/NullableWithDefaultsSchemaTest.kt b/src/test/kotlin/com/github/avrokotlin/avro4k/schema/NullableWithDefaultsSchemaTest.kt new file mode 100644 index 00000000..4fc76018 --- /dev/null +++ b/src/test/kotlin/com/github/avrokotlin/avro4k/schema/NullableWithDefaultsSchemaTest.kt @@ -0,0 +1,18 @@ +package com.github.avrokotlin.avro4k.schema + +import com.github.avrokotlin.avro4k.Avro +import com.github.avrokotlin.avro4k.AvroConfiguration +import io.kotest.core.spec.style.FunSpec +import io.kotest.matchers.shouldBe +import kotlinx.serialization.Serializable + +class NullableWithDefaultsSchemaTest : FunSpec({ + test("generate null as Union[T, Null] with default null") { + val expected = org.apache.avro.Schema.Parser().parse(javaClass.getResourceAsStream("/nullables-with-defaults.json")) + val schema = Avro(AvroConfiguration(implicitNulls = true)).schema(Test.serializer()) + schema.toString(true) shouldBe expected.toString(true) + } +}) { + @Serializable + data class Test(val nullableString: String?, val nullableBoolean: Boolean?) +} diff --git a/src/test/resources/nullables-with-defaults.json b/src/test/resources/nullables-with-defaults.json new file mode 100644 index 00000000..c8499d37 --- /dev/null +++ b/src/test/resources/nullables-with-defaults.json @@ -0,0 +1,23 @@ +{ + "type": "record", + "name": "Test", + "namespace": "com.github.avrokotlin.avro4k.schema.NullableWithDefaultsSchemaTest", + "fields": [ + { + "name": "nullableString", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "nullableBoolean", + "type": [ + "null", + "boolean" + ], + "default": null + } + ] +}