From a7853db2843feb54a4aea82e0c6fc2922f296e91 Mon Sep 17 00:00:00 2001 From: Michel Davit Date: Mon, 22 Jul 2024 10:11:07 +0200 Subject: [PATCH 1/3] [avro] Use SchemaBuilder API Move implementation to the SchemaBuilder API. This removes the cluncky JSON conversion type required for default values --- .../main/scala/magnolify/avro/AvroType.scala | 101 +++++++++--------- 1 file changed, 49 insertions(+), 52 deletions(-) diff --git a/avro/src/main/scala/magnolify/avro/AvroType.scala b/avro/src/main/scala/magnolify/avro/AvroType.scala index 7ede81ff..b89144c7 100644 --- a/avro/src/main/scala/magnolify/avro/AvroType.scala +++ b/avro/src/main/scala/magnolify/avro/AvroType.scala @@ -21,7 +21,7 @@ import magnolify.shared.{doc => _, _} import magnolify.shims.FactoryCompat import org.apache.avro.generic.GenericData.EnumSymbol import org.apache.avro.generic._ -import org.apache.avro.{JsonProperties, LogicalType, LogicalTypes, Schema} +import org.apache.avro.{LogicalType, LogicalTypes, Schema, SchemaBuilder} import org.joda.{time => joda} import java.nio.{ByteBuffer, ByteOrder} @@ -105,34 +105,26 @@ object AvroField { } } else { new Record[T] { - override protected def buildSchema(cm: CaseMapper): Schema = Schema - .createRecord( - caseClass.typeName.short, - getDoc(caseClass.annotations, caseClass.typeName.full), - caseClass.typeName.owner, - false, - caseClass.parameters.map { p => - new Schema.Field( - cm.map(p.label), - p.typeclass.schema(cm), - getDoc(p.annotations, s"${caseClass.typeName.full}#${p.label}"), - p.default - .map(d => p.typeclass.makeDefault(d)(cm)) - .getOrElse(p.typeclass.fallbackDefault) - ) - }.asJava - ) - - // `JacksonUtils.toJson` expects `Map[String, Any]` for `RECORD` defaults - override def makeDefault(d: T)(cm: CaseMapper): ju.Map[String, Any] = { + override protected def buildSchema(cm: CaseMapper): Schema = { + val builder = SchemaBuilder + .record(caseClass.typeName.short) + .namespace(caseClass.typeName.owner) + .doc(getDoc(caseClass.annotations, caseClass.typeName.full)) + .fields() + caseClass.parameters - .map { p => - val name = cm.map(p.label) - val value = p.typeclass.makeDefault(p.dereference(d))(cm) - name -> value + .foldLeft(builder) { (b, p) => + val f = b + .name(cm.map(p.label)) + .doc(getDoc(p.annotations, s"${caseClass.typeName.full}#${p.label}")) + .`type`(p.typeclass.schema(cm)) + + p.default match { + case Some(d) => f.withDefault(p.typeclass.makeDefault(d)(cm)) + case None => f.noDefault() + } } - .toMap - .asJava + .endRecord() } override def from(v: GenericRecord)(cm: CaseMapper): T = @@ -198,9 +190,7 @@ object AvroField { implicit val afFloat: AvroField[Float] = id[Float](Schema.Type.FLOAT) implicit val afDouble: AvroField[Double] = id[Double](Schema.Type.DOUBLE) implicit val afByteBuffer: AvroField[ByteBuffer] = new Aux[ByteBuffer, ByteBuffer, ByteBuffer] { - override protected def buildSchema(cm: CaseMapper): Schema = Schema.create(Schema.Type.BYTES) - // `JacksonUtils.toJson` expects `Array[Byte]` for `BYTES` defaults - override def makeDefault(d: ByteBuffer)(cm: CaseMapper): Array[Byte] = d.array() + override protected def buildSchema(cm: CaseMapper): Schema = SchemaBuilder.builder().bytesType() // copy to avoid issue in case original buffer is reused override def from(v: ByteBuffer)(cm: CaseMapper): ByteBuffer = { val ptr = v.asReadOnlyBuffer() @@ -213,11 +203,12 @@ object AvroField { implicit val afCharSequence: AvroField[CharSequence] = id[CharSequence](Schema.Type.STRING) implicit val afString: AvroField[String] = new Aux[String, String, String] { - override protected def buildSchema(cm: CaseMapper): Schema = { - val schema = Schema.create(Schema.Type.STRING) - GenericData.setStringType(schema, GenericData.StringType.String) - schema - } + override protected def buildSchema(cm: CaseMapper): Schema = + SchemaBuilder + .builder() + .stringBuilder() + .prop(GenericData.STRING_PROP, GenericData.StringType.String) + .endString() override def from(v: String)(cm: CaseMapper): String = v override def to(v: String)(cm: CaseMapper): String = v } @@ -226,12 +217,12 @@ object AvroField { // Avro 1.9+ added a type parameter for `GenericEnumSymbol`, breaking 1.8 compatibility // Some reader, i.e. `AvroParquetReader` reads enums as `Utf8` new Aux[T, AnyRef, EnumSymbol] { - override protected def buildSchema(cm: CaseMapper): Schema = { - val doc = getDoc(et.annotations, s"Enum ${et.namespace}.${et.name}") - Schema.createEnum(et.name, doc, et.namespace, et.values.asJava) - } - // `JacksonUtils.toJson` expects `String` for `ENUM` defaults - override def makeDefault(d: T)(cm: CaseMapper): String = et.to(d) + override protected def buildSchema(cm: CaseMapper): Schema = + SchemaBuilder + .enumeration(et.name) + .doc(getDoc(et.annotations, s"Enum ${et.namespace}.${et.name}")) + .namespace(et.namespace) + .symbols(et.values: _*) override def from(v: FromT)(cm: CaseMapper): T = et.from(v.toString) override def to(v: T)(cm: CaseMapper): ToT = new GenericData.EnumSymbol(schema(cm), v) } @@ -239,11 +230,11 @@ object AvroField { implicit def afOption[T](implicit f: AvroField[T]): AvroField[Option[T]] = new Aux[Option[T], f.FromT, f.ToT] { override protected def buildSchema(cm: CaseMapper): Schema = - Schema.createUnion(Schema.create(Schema.Type.NULL), f.schema(cm)) + SchemaBuilder.unionOf().nullType.and().`type`(f.schema(cm)).endUnion() // `Option[T]` is a `UNION` of `[NULL, T]` and must default to first type `NULL` - override def makeDefault(d: Option[T])(cm: CaseMapper): JsonProperties.Null = { + override def makeDefault(d: Option[T])(cm: CaseMapper): Null = { require(d.isEmpty, "Option[T] can only default to None") - JsonProperties.NULL_VALUE + null } override def from(v: f.FromT)(cm: CaseMapper): Option[T] = if (v == null) None else Some(f.from(v)(cm)) @@ -259,7 +250,8 @@ object AvroField { fc: FactoryCompat[T, C[T]] ): AvroField[C[T]] = new Aux[C[T], ju.List[f.FromT], GenericArray[f.ToT]] { - override protected def buildSchema(cm: CaseMapper): Schema = Schema.createArray(f.schema(cm)) + override protected def buildSchema(cm: CaseMapper): Schema = + SchemaBuilder.array().items(f.schema(cm)) override def fallbackDefault: ju.List[f.ToT] = ju.Collections.emptyList() override def from(v: ju.List[f.FromT])(cm: CaseMapper): C[T] = fc.fromSpecific(v.asScala.iterator.map(p => f.from(p)(cm))) @@ -269,7 +261,8 @@ object AvroField { implicit def afCharSequenceMap[T](implicit f: AvroField[T]): AvroField[Map[CharSequence, T]] = new Aux[Map[CharSequence, T], ju.Map[CharSequence, f.FromT], ju.Map[CharSequence, f.ToT]] { - override protected def buildSchema(cm: CaseMapper): Schema = Schema.createMap(f.schema(cm)) + override protected def buildSchema(cm: CaseMapper): Schema = + SchemaBuilder.map().values(f.schema(cm)) override def fallbackDefault: ju.Map[CharSequence, f.ToT] = ju.Collections.emptyMap() override def from(v: ju.Map[CharSequence, f.FromT])(cm: CaseMapper): Map[CharSequence, T] = v.asScala.map { case (k, v) => k -> f.from(v)(cm) }.toMap @@ -279,11 +272,11 @@ object AvroField { implicit def afStringMap[T](implicit f: AvroField[T]): AvroField[Map[String, T]] = new Aux[Map[String, T], ju.Map[String, f.FromT], ju.Map[String, f.ToT]] { - override protected def buildSchema(cm: CaseMapper): Schema = { - val schema = Schema.createMap(f.schema(cm)) - GenericData.setStringType(schema, GenericData.StringType.String) - schema - } + override protected def buildSchema(cm: CaseMapper): Schema = + SchemaBuilder + .map() + .prop(GenericData.STRING_PROP, GenericData.StringType.String) + .values(f.schema(cm)) override def fallbackDefault: ju.Map[String, f.ToT] = ju.Collections.emptyMap() override def from(v: ju.Map[String, f.FromT])(cm: CaseMapper): Map[String, T] = v.asScala.map { case (k, v) => k -> f.from(v)(cm) }.toMap @@ -361,7 +354,11 @@ object AvroField { override protected def buildSchema(cm: CaseMapper): Schema = { val n = ReflectionUtils.name[T] val ns = ReflectionUtils.namespace[T] - Schema.createFixed(n, getDoc(an.annotations, n), ns, size) + SchemaBuilder + .fixed(n) + .namespace(ns) + .doc(getDoc(an.annotations, n)) + .size(size) } override def from(v: GenericFixed)(cm: CaseMapper): T = f(v.bytes()) From a39c2709130d1618c62b29f356f36f9ce70a30dc Mon Sep 17 00:00:00 2001 From: Michel Davit Date: Mon, 22 Jul 2024 10:30:02 +0200 Subject: [PATCH 2/3] Use setStringType helper for schema --- .../main/scala/magnolify/avro/AvroType.scala | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/avro/src/main/scala/magnolify/avro/AvroType.scala b/avro/src/main/scala/magnolify/avro/AvroType.scala index b89144c7..d9e8eaf6 100644 --- a/avro/src/main/scala/magnolify/avro/AvroType.scala +++ b/avro/src/main/scala/magnolify/avro/AvroType.scala @@ -203,12 +203,11 @@ object AvroField { implicit val afCharSequence: AvroField[CharSequence] = id[CharSequence](Schema.Type.STRING) implicit val afString: AvroField[String] = new Aux[String, String, String] { - override protected def buildSchema(cm: CaseMapper): Schema = - SchemaBuilder - .builder() - .stringBuilder() - .prop(GenericData.STRING_PROP, GenericData.StringType.String) - .endString() + override protected def buildSchema(cm: CaseMapper): Schema = { + val schema = SchemaBuilder.builder().stringType() + GenericData.setStringType(schema, GenericData.StringType.String) + schema + } override def from(v: String)(cm: CaseMapper): String = v override def to(v: String)(cm: CaseMapper): String = v } @@ -272,11 +271,11 @@ object AvroField { implicit def afStringMap[T](implicit f: AvroField[T]): AvroField[Map[String, T]] = new Aux[Map[String, T], ju.Map[String, f.FromT], ju.Map[String, f.ToT]] { - override protected def buildSchema(cm: CaseMapper): Schema = - SchemaBuilder - .map() - .prop(GenericData.STRING_PROP, GenericData.StringType.String) - .values(f.schema(cm)) + override protected def buildSchema(cm: CaseMapper): Schema = { + val schema = SchemaBuilder.map().values(f.schema(cm)) + GenericData.setStringType(schema, GenericData.StringType.String) + schema + } override def fallbackDefault: ju.Map[String, f.ToT] = ju.Collections.emptyMap() override def from(v: ju.Map[String, f.FromT])(cm: CaseMapper): Map[String, T] = v.asScala.map { case (k, v) => k -> f.from(v)(cm) }.toMap From 523cf2d8f9c4f4fd6c858b2b2ad3a6705e15fdaa Mon Sep 17 00:00:00 2001 From: Michel Davit Date: Mon, 22 Jul 2024 11:53:56 +0200 Subject: [PATCH 3/3] Test legacy avro with 1.9 --- .github/workflows/ci.yml | 2 +- build.sbt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cff0905a..7aff6425 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -338,7 +338,7 @@ jobs: - name: Test env: - JAVA_OPTS: '-Davro.version=1.8.2' + JAVA_OPTS: '-Davro.version=1.9.2' run: sbt '++ ${{ matrix.scala }}' avro/test site: diff --git a/build.sbt b/build.sbt index d1d17197..263f3b09 100644 --- a/build.sbt +++ b/build.sbt @@ -183,7 +183,7 @@ ThisBuild / githubWorkflowAddedJobs ++= Seq( List( WorkflowStep.Sbt( List("avro/test"), - env = Map("JAVA_OPTS" -> "-Davro.version=1.8.2"), + env = Map("JAVA_OPTS" -> "-Davro.version=1.9.2"), name = Some("Test") ) ),