Skip to content

Commit

Permalink
feat(schema): Implicit nulls (false by default)
Browse files Browse the repository at this point in the history
  • Loading branch information
Chuckame committed May 29, 2023
1 parent f6a7c0c commit b2d47f0
Show file tree
Hide file tree
Showing 8 changed files with 123 additions and 33 deletions.
25 changes: 24 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -415,10 +415,33 @@ Would result in the following schema:
}
```

### Nullable fields, optional fields and compatibility

#### TL;DR;
To make your nullable fields optional (put `default: null` on all nullable fields if no other explicit default provided) and be able to remove nullable fields regarding compatibility checks,
you can set in the configuration the `defaultNullForNullableFields` to `true`. Example:
```kotlin
Avro(AvroConfiguration(defaultNullForNullableFields = true))
```

#### Longer story

With avro, you can have nullable fields and optional fields, that are taken into account for compatibility checking when using the schema registry.

But if you want to remove a nullable field that is not optional, depending on the compatibility mode, it may not be compatible because of the missing default value.

- What is an optional field ?
> An optional field is a field that have a *default* value, like an int with a default as `-1`.
- What is a nullable field ?
> A nullable field is a field that contains a `null` type in its type union, but **it's not an optional field if you don't put `default` value to `null`**.
So to mark a field as optional and facilitate avro contract evolution regarding compatibility checks, then set `default` to `null`.


## Types

Avro4s supports the Avro logical types out of the box as well as other common JDK types.
Avro4k supports the Avro logical types out of the box as well as other common JDK types.

Avro has no understanding of Kotlin types, or anything outside of it's built in set of supported types, so all values must be converted to something that is compatible with Avro.

Expand Down
14 changes: 11 additions & 3 deletions src/main/kotlin/com/github/avrokotlin/avro4k/Avro.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,18 @@ package com.github.avrokotlin.avro4k

import com.github.avrokotlin.avro4k.decoder.RootRecordDecoder
import com.github.avrokotlin.avro4k.encoder.RootRecordEncoder
import com.github.avrokotlin.avro4k.io.*
import com.github.avrokotlin.avro4k.io.AvroDecodeFormat
import com.github.avrokotlin.avro4k.io.AvroEncodeFormat
import com.github.avrokotlin.avro4k.io.AvroFormat
import com.github.avrokotlin.avro4k.io.AvroInputStream
import com.github.avrokotlin.avro4k.io.AvroOutputStream
import com.github.avrokotlin.avro4k.schema.schemaFor
import com.github.avrokotlin.avro4k.serializer.UUIDSerializer
import kotlinx.serialization.*
import kotlinx.serialization.BinaryFormat
import kotlinx.serialization.DeserializationStrategy
import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.SerialFormat
import kotlinx.serialization.SerializationStrategy
import kotlinx.serialization.descriptors.SerialDescriptor
import kotlinx.serialization.modules.SerializersModule
import kotlinx.serialization.modules.contextual
Expand Down Expand Up @@ -259,7 +267,7 @@ class Avro(
serializersModule,
descriptor,
descriptor.annotations,
configuration.namingStrategy,
configuration,
mutableMapOf()
).schema()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,7 @@ package com.github.avrokotlin.avro4k
import com.github.avrokotlin.avro4k.schema.DefaultNamingStrategy
import com.github.avrokotlin.avro4k.schema.NamingStrategy

data class AvroConfiguration(val namingStrategy: NamingStrategy = DefaultNamingStrategy)
data class AvroConfiguration(
val namingStrategy: NamingStrategy = DefaultNamingStrategy,
val implicitNulls: Boolean = false,
)
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package com.github.avrokotlin.avro4k.schema

import com.github.avrokotlin.avro4k.AnnotationExtractor
import com.github.avrokotlin.avro4k.Avro
import com.github.avrokotlin.avro4k.AvroConfiguration
import com.github.avrokotlin.avro4k.AvroProp
import com.github.avrokotlin.avro4k.RecordNaming
import kotlinx.serialization.ExperimentalSerializationApi
Expand All @@ -22,7 +23,7 @@ import org.apache.avro.SchemaBuilder
@ExperimentalSerializationApi
class ClassSchemaFor(
private val descriptor: SerialDescriptor,
private val namingStrategy: NamingStrategy,
private val configuration: AvroConfiguration,
private val serializersModule: SerializersModule,
private val resolvedSchemas: MutableMap<RecordNaming, Schema>
) : SchemaFor {
Expand Down Expand Up @@ -74,12 +75,12 @@ class ClassSchemaFor(
val fieldDescriptor = descriptor.getElementDescriptor(index)
val annos = AnnotationExtractor(descriptor.getElementAnnotations(
index))
val fieldNaming = RecordNaming(descriptor, index, namingStrategy)
val fieldNaming = RecordNaming(descriptor, index, configuration.namingStrategy)
val schema = schemaFor(
serializersModule,
fieldDescriptor,
descriptor.getElementAnnotations(index),
namingStrategy,
configuration,
resolvedSchemas
).schema()

Expand All @@ -89,7 +90,7 @@ class ClassSchemaFor(
val (size, name) = when (val a = annos.fixed()) {
null -> {
val fieldAnnos = AnnotationExtractor(fieldDescriptor.annotations)
val n = RecordNaming(fieldDescriptor, namingStrategy)
val n = RecordNaming(fieldDescriptor, configuration.namingStrategy)
when (val b = fieldAnnos.fixed()) {
null -> 0 to n.name
else -> b to n.name
Expand All @@ -115,18 +116,20 @@ class ClassSchemaFor(
else -> schemaOrFixed.overrideNamespace(ns)
}

val default: Any? = annos.default()?.let {
val default: Any? = annos.default()?.let { annotationDefaultValue ->
when {
it == Avro.NULL -> Schema.Field.NULL_DEFAULT_VALUE
annotationDefaultValue == Avro.NULL -> Schema.Field.NULL_DEFAULT_VALUE
schemaWithResolvedNamespace.extractNonNull().type in listOf(
Schema.Type.FIXED,
Schema.Type.BYTES,
Schema.Type.STRING,
Schema.Type.ENUM
) -> it
else -> json.parseToJsonElement(it).convertToAvroDefault()
) -> annotationDefaultValue
else -> json.parseToJsonElement(annotationDefaultValue).convertToAvroDefault()
}
}
} ?: if (configuration.implicitNulls && fieldDescriptor.isNullable) {
Schema.Field.NULL_DEFAULT_VALUE
} else null

val field = Schema.Field(fieldNaming.name, schemaWithResolvedNamespace, annos.doc(), default)
val props = this.descriptor.getElementAnnotations(index).filterIsInstance<AvroProp>()
Expand Down
45 changes: 28 additions & 17 deletions src/main/kotlin/com/github/avrokotlin/avro4k/schema/SchemaFor.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package com.github.avrokotlin.avro4k.schema

import com.github.avrokotlin.avro4k.AnnotationExtractor
import com.github.avrokotlin.avro4k.Avro
import com.github.avrokotlin.avro4k.AvroConfiguration
import com.github.avrokotlin.avro4k.RecordNaming
import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.InternalSerializationApi
Expand Down Expand Up @@ -64,7 +65,7 @@ class EnumSchemaFor(

@ExperimentalSerializationApi
class PairSchemaFor(private val descriptor: SerialDescriptor,
private val namingStrategy: NamingStrategy,
private val configuration: AvroConfiguration,
private val serializersModule: SerializersModule,
private val resolvedSchemas: MutableMap<RecordNaming, Schema>
) : SchemaFor {
Expand All @@ -74,14 +75,14 @@ class PairSchemaFor(private val descriptor: SerialDescriptor,
serializersModule,
descriptor.getElementDescriptor(0),
descriptor.getElementAnnotations(0),
namingStrategy,
configuration,
resolvedSchemas
)
val b = schemaFor(
serializersModule,
descriptor.getElementDescriptor(1),
descriptor.getElementAnnotations(1),
namingStrategy,
configuration,
resolvedSchemas
)
return SchemaBuilder.unionOf()
Expand All @@ -91,10 +92,11 @@ class PairSchemaFor(private val descriptor: SerialDescriptor,
.endUnion()
}
}

@ExperimentalSerializationApi
class ListSchemaFor(private val descriptor: SerialDescriptor,
private val serializersModule: SerializersModule,
private val namingStrategy: NamingStrategy,
private val configuration: AvroConfiguration,
private val resolvedSchemas: MutableMap<RecordNaming, Schema>
) : SchemaFor {

Expand All @@ -107,18 +109,19 @@ class ListSchemaFor(private val descriptor: SerialDescriptor,
val elementSchema = schemaFor(serializersModule,
elementType,
descriptor.getElementAnnotations(0),
namingStrategy,
configuration,
resolvedSchemas
).schema()
return Schema.createArray(elementSchema)
}
}
}
}

@ExperimentalSerializationApi
class MapSchemaFor(private val descriptor: SerialDescriptor,
private val serializersModule: SerializersModule,
private val namingStrategy: NamingStrategy,
private val configuration: AvroConfiguration,
private val resolvedSchemas: MutableMap<RecordNaming, Schema>
) : SchemaFor {

Expand All @@ -131,25 +134,31 @@ class MapSchemaFor(private val descriptor: SerialDescriptor,
serializersModule,
valueType,
descriptor.getElementAnnotations(1),
namingStrategy,
configuration,
resolvedSchemas
).schema()
return Schema.createMap(valueSchema)
}

else -> throw RuntimeException("Avro only supports STRING as the key type in a MAP")
}
}
}

@ExperimentalSerializationApi
class NullableSchemaFor(private val schemaFor: SchemaFor, private val annotations : List<Annotation>) : SchemaFor {
class NullableSchemaFor(
private val schemaFor: SchemaFor,
private val annotations: List<Annotation>,
) : SchemaFor {

private val nullFirst by lazy{
private val nullFirst by lazy {
//The default value can only be of the first type in the union definition.
//Therefore we have to check the default value in order to decide the order of types within the union.
//If no default is set, or if the default value is of type "null", nulls will be first.
val default = AnnotationExtractor(annotations).default()
default == null || default == Avro.NULL
}

override fun schema(): Schema {
val elementSchema = schemaFor.schema()
val nullSchema = SchemaBuilder.builder().nullType()
Expand All @@ -162,7 +171,7 @@ class NullableSchemaFor(private val schemaFor: SchemaFor, private val annotation
fun schemaFor(serializersModule: SerializersModule,
descriptor: SerialDescriptor,
annos: List<Annotation>,
namingStrategy: NamingStrategy,
configuration: AvroConfiguration,
resolvedSchemas: MutableMap<RecordNaming, Schema>
): SchemaFor {

Expand All @@ -173,7 +182,7 @@ fun schemaFor(serializersModule: SerializersModule,
} else descriptor

val schemaFor: SchemaFor = when (underlying) {
is AvroDescriptor -> SchemaFor.const(underlying.schema(annos, serializersModule, namingStrategy))
is AvroDescriptor -> SchemaFor.const(underlying.schema(annos, serializersModule, configuration.namingStrategy))
else -> when (descriptor.unwrapValueClass.kind) {
PrimitiveKind.STRING -> SchemaFor.StringSchemaFor
PrimitiveKind.LONG -> SchemaFor.LongSchemaFor
Expand All @@ -193,16 +202,18 @@ fun schemaFor(serializersModule: SerializersModule,
"Contextual or default serializer not found for $descriptor "
},
annos,
namingStrategy,
configuration,
resolvedSchemas
)

StructureKind.CLASS, StructureKind.OBJECT -> when (descriptor.serialName) {
"kotlin.Pair" -> PairSchemaFor(descriptor, namingStrategy, serializersModule, resolvedSchemas)
else -> ClassSchemaFor(descriptor, namingStrategy, serializersModule, resolvedSchemas)
"kotlin.Pair" -> PairSchemaFor(descriptor, configuration, serializersModule, resolvedSchemas)
else -> ClassSchemaFor(descriptor, configuration, serializersModule, resolvedSchemas)
}
StructureKind.LIST -> ListSchemaFor(descriptor, serializersModule, namingStrategy, resolvedSchemas)
StructureKind.MAP -> MapSchemaFor(descriptor, serializersModule, namingStrategy, resolvedSchemas)
is PolymorphicKind -> UnionSchemaFor(descriptor, namingStrategy, serializersModule, resolvedSchemas)

StructureKind.LIST -> ListSchemaFor(descriptor, serializersModule, configuration, resolvedSchemas)
StructureKind.MAP -> MapSchemaFor(descriptor, serializersModule, configuration, resolvedSchemas)
is PolymorphicKind -> UnionSchemaFor(descriptor, configuration, serializersModule, resolvedSchemas)
else -> throw SerializationException("Unsupported type ${descriptor.serialName} of ${descriptor.kind}")
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.github.avrokotlin.avro4k.schema

import com.github.avrokotlin.avro4k.AvroConfiguration
import com.github.avrokotlin.avro4k.RecordNaming
import com.github.avrokotlin.avro4k.possibleSerializationSubclasses
import kotlinx.serialization.ExperimentalSerializationApi
Expand All @@ -10,7 +11,7 @@ import org.apache.avro.Schema
@ExperimentalSerializationApi
class UnionSchemaFor(
private val descriptor: SerialDescriptor,
private val namingStrategy: NamingStrategy,
private val configuration: AvroConfiguration,
private val serializersModule: SerializersModule,
private val resolvedSchemas: MutableMap<RecordNaming, Schema>
) : SchemaFor {
Expand All @@ -19,7 +20,7 @@ class UnionSchemaFor(
descriptor.possibleSerializationSubclasses(serializersModule).sortedBy { it.serialName }
return Schema.createUnion(
leafSerialDescriptors.map {
ClassSchemaFor(it, namingStrategy, serializersModule, resolvedSchemas).schema()
ClassSchemaFor(it, configuration, serializersModule, resolvedSchemas).schema()
}
)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package com.github.avrokotlin.avro4k.schema

import com.github.avrokotlin.avro4k.Avro
import com.github.avrokotlin.avro4k.AvroConfiguration
import io.kotest.core.spec.style.FunSpec
import io.kotest.matchers.shouldBe
import kotlinx.serialization.Serializable

class NullableWithDefaultsSchemaTest : FunSpec({
test("generate null as Union[T, Null] with default null") {
val expected = org.apache.avro.Schema.Parser().parse(javaClass.getResourceAsStream("/nullables-with-defaults.json"))
val schema = Avro(AvroConfiguration(implicitNulls = true)).schema(Test.serializer())
schema.toString(true) shouldBe expected.toString(true)
}
}) {
@Serializable
data class Test(val nullableString: String?, val nullableBoolean: Boolean?)
}
23 changes: 23 additions & 0 deletions src/test/resources/nullables-with-defaults.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"type": "record",
"name": "Test",
"namespace": "com.github.avrokotlin.avro4k.schema.NullableWithDefaultsSchemaTest",
"fields": [
{
"name": "nullableString",
"type": [
"null",
"string"
],
"default": null
},
{
"name": "nullableBoolean",
"type": [
"null",
"boolean"
],
"default": null
}
]
}

0 comments on commit b2d47f0

Please sign in to comment.