From 362c7404e6c21ca4d04763dde82a7158a8113602 Mon Sep 17 00:00:00 2001 From: Miel Donkers Date: Tue, 3 Sep 2024 21:22:43 +0200 Subject: [PATCH] Some performance related changes to evaluate - Allow tuple type to prevent array creation overhead - Prevent hashing enums just for checking validity - Allow String column provider to take the name, so that it can use different settings --- go.mod | 1 + go.sum | 2 ++ lib/column/column_gen.go | 2 +- lib/column/column_gen_option.go | 6 +++--- lib/column/enum.go | 11 +++++++++-- lib/column/enum16.go | 16 ++++++++++++++-- lib/column/enum8.go | 16 ++++++++++++++-- lib/column/tuple.go | 21 +++++++++++++++++++++ 8 files changed, 65 insertions(+), 10 deletions(-) diff --git a/go.mod b/go.mod index fd9ff7a9c8..9578a38d37 100644 --- a/go.mod +++ b/go.mod @@ -70,6 +70,7 @@ require ( go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.19.0 // indirect go.opentelemetry.io/otel/metric v1.26.0 // indirect golang.org/x/crypto v0.28.0 // indirect + golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c // indirect golang.org/x/sys v0.26.0 // indirect golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20240318140521-94a12d6c2237 // indirect diff --git a/go.sum b/go.sum index 808477a54a..5ca72a591d 100644 --- a/go.sum +++ b/go.sum @@ -194,6 +194,8 @@ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= +golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c h1:7dEasQXItcW1xKJ2+gg5VOiBnqWrJc+rq0DPKyvvdbY= +golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c/go.mod h1:NQtJDoLvd6faHhE7m4T/1IY708gDefGGjR/iUW8yQQ8= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= diff --git a/lib/column/column_gen.go b/lib/column/column_gen.go index 5bd1180a3b..943bb047a5 100644 --- a/lib/column/column_gen.go +++ b/lib/column/column_gen.go @@ -136,7 +136,7 @@ func (t Type) Column(name string, tz *time.Location) (Interface, error) { case "Point": return &Point{name: name}, nil case "String": - return &String{name: name, col: colStrProvider()}, nil + return &String{name: name, col: colStrProvider(name)}, nil case "Object('json')": return &JSONObject{name: name, root: true, tz: tz}, nil } diff --git a/lib/column/column_gen_option.go b/lib/column/column_gen_option.go index 03f93694cc..6a883527b4 100644 --- a/lib/column/column_gen_option.go +++ b/lib/column/column_gen_option.go @@ -20,13 +20,13 @@ package column import "github.com/ClickHouse/ch-go/proto" // ColStrProvider defines provider of proto.ColStr -type ColStrProvider func() proto.ColStr +type ColStrProvider func(name string) proto.ColStr // colStrProvider provide proto.ColStr for Column() when type is String var colStrProvider ColStrProvider = defaultColStrProvider // defaultColStrProvider defines sample provider for proto.ColStr -func defaultColStrProvider() proto.ColStr { +func defaultColStrProvider(string) proto.ColStr { return proto.ColStr{} } @@ -35,7 +35,7 @@ func defaultColStrProvider() proto.ColStr { // // It is more suitable for scenarios where a lot of data is written in batches func WithAllocBufferColStrProvider(cap int) { - colStrProvider = func() proto.ColStr { + colStrProvider = func(string) proto.ColStr { return proto.ColStr{Buf: make([]byte, 0, cap)} } } diff --git a/lib/column/enum.go b/lib/column/enum.go index 25d2e2199d..a69a3de540 100644 --- a/lib/column/enum.go +++ b/lib/column/enum.go @@ -20,10 +20,11 @@ package column import ( "bytes" "errors" + "github.com/ClickHouse/ch-go/proto" + "golang.org/x/exp/maps" "math" + "slices" "strconv" - - "github.com/ClickHouse/ch-go/proto" ) func Enum(chType Type, name string) (Interface, error) { @@ -47,6 +48,9 @@ func Enum(chType Type, name string) (Interface, error) { enum.iv[values[i]] = proto.Enum8(v) enum.vi[proto.Enum8(v)] = values[i] } + enum.minEnum = int8(slices.Min(maps.Keys(enum.vi))) + enum.maxEnum = int8(slices.Max(maps.Keys(enum.vi))) + enum.continuous = (enum.maxEnum-enum.minEnum)+1 == int8(len(enum.vi)) return &enum, nil } enum := Enum16{ @@ -60,6 +64,9 @@ func Enum(chType Type, name string) (Interface, error) { enum.iv[values[i]] = proto.Enum16(indexes[i]) enum.vi[proto.Enum16(indexes[i])] = values[i] } + enum.minEnum = int16(slices.Min(maps.Keys(enum.vi))) + enum.maxEnum = int16(slices.Max(maps.Keys(enum.vi))) + enum.continuous = (enum.maxEnum-enum.minEnum)+1 == int16(len(enum.vi)) return &enum, nil } diff --git a/lib/column/enum16.go b/lib/column/enum16.go index c394e7fff3..d3a15b80d6 100644 --- a/lib/column/enum16.go +++ b/lib/column/enum16.go @@ -31,6 +31,10 @@ type Enum16 struct { chType Type col proto.ColEnum16 name string + + continuous bool + minEnum int16 + maxEnum int16 } func (col *Enum16) Reset() { @@ -179,9 +183,17 @@ func (col *Enum16) Append(v any) (nulls []uint8, err error) { func (col *Enum16) AppendRow(elem any) error { switch elem := elem.(type) { case int16: - return col.AppendRow(int(elem)) + if col.continuous && elem >= col.minEnum && elem <= col.maxEnum { + col.col.Append(proto.Enum16(elem)) + } else { + return col.AppendRow(int(elem)) + } case *int16: - return col.AppendRow(int(*elem)) + if col.continuous && *elem >= col.minEnum && *elem <= col.maxEnum { + col.col.Append(proto.Enum16(*elem)) + } else { + return col.AppendRow(int(*elem)) + } case int: v := proto.Enum16(elem) _, ok := col.vi[v] diff --git a/lib/column/enum8.go b/lib/column/enum8.go index 4aee561ad7..5417e5c9a3 100644 --- a/lib/column/enum8.go +++ b/lib/column/enum8.go @@ -31,6 +31,10 @@ type Enum8 struct { chType Type name string col proto.ColEnum8 + + continuous bool + minEnum int8 + maxEnum int8 } func (col *Enum8) Reset() { @@ -179,9 +183,17 @@ func (col *Enum8) Append(v any) (nulls []uint8, err error) { func (col *Enum8) AppendRow(elem any) error { switch elem := elem.(type) { case int8: - return col.AppendRow(int(elem)) + if col.continuous && elem >= col.minEnum && elem <= col.maxEnum { + col.col.Append(proto.Enum8(elem)) + } else { + return col.AppendRow(int(elem)) + } case *int8: - return col.AppendRow(int(*elem)) + if col.continuous && *elem >= col.minEnum && *elem <= col.maxEnum { + col.col.Append(proto.Enum8(*elem)) + } else { + return col.AppendRow(int(*elem)) + } case int: v := proto.Enum8(elem) _, ok := col.vi[v] diff --git a/lib/column/tuple.go b/lib/column/tuple.go index 95c4e24209..68dad31184 100644 --- a/lib/column/tuple.go +++ b/lib/column/tuple.go @@ -39,6 +39,10 @@ type Tuple struct { index map[string]int // map from col name to offset in columns } +type Tuple2 interface { + Get() (any, any) +} + func (col *Tuple) Reset() { for i := range col.columns { col.columns[i].Reset() @@ -566,6 +570,23 @@ func (col *Tuple) AppendRow(v any) error { return nil } + if tuple2, ok := v.(Tuple2); ok { + if 2 != len(col.columns) { + return &Error{ + ColumnType: string(col.chType), + Err: fmt.Errorf("invalid size. expected %d got %d", len(col.columns), 2), + } + } + elem1, elem2 := tuple2.Get() + if err := col.columns[0].AppendRow(elem1); err != nil { + return err + } + if err := col.columns[1].AppendRow(elem2); err != nil { + return err + } + return nil + } + if valuer, ok := v.(driver.Valuer); ok { val, err := valuer.Value() if err != nil {