From 3e0df2f6c30a71fbea66abdafd7c393ab2b0ac04 Mon Sep 17 00:00:00 2001 From: Rafi Shamim Date: Tue, 7 Feb 2023 17:47:19 -0500 Subject: [PATCH 1/2] collatedstring: create new package Move the small amount of code from tree/collatedstring.go Release note: None --- pkg/BUILD.bazel | 2 ++ pkg/sql/BUILD.bazel | 1 + pkg/sql/information_schema.go | 5 +++-- pkg/sql/pg_catalog.go | 7 ++++--- pkg/sql/sem/tree/BUILD.bazel | 2 +- pkg/sql/sem/tree/create.go | 3 ++- pkg/sql/sem/tree/type_check.go | 3 ++- pkg/util/collatedstring/BUILD.bazel | 12 ++++++++++++ .../tree => util/collatedstring}/collatedstring.go | 4 ++-- 9 files changed, 29 insertions(+), 10 deletions(-) create mode 100644 pkg/util/collatedstring/BUILD.bazel rename pkg/{sql/sem/tree => util/collatedstring}/collatedstring.go (91%) diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index b88940aba579..fff5e5d0d126 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -1915,6 +1915,7 @@ GO_TARGETS = [ "//pkg/util/circuit:circuit_test", "//pkg/util/cloudinfo:cloudinfo", "//pkg/util/cloudinfo:cloudinfo_test", + "//pkg/util/collatedstring:collatedstring", "//pkg/util/contextutil:contextutil", "//pkg/util/contextutil:contextutil_test", "//pkg/util/ctxgroup:ctxgroup", @@ -2931,6 +2932,7 @@ GET_X_DATA_TARGETS = [ "//pkg/util/cgroups:get_x_data", "//pkg/util/circuit:get_x_data", "//pkg/util/cloudinfo:get_x_data", + "//pkg/util/collatedstring:get_x_data", "//pkg/util/contextutil:get_x_data", "//pkg/util/ctxgroup:get_x_data", "//pkg/util/duration:get_x_data", diff --git a/pkg/sql/BUILD.bazel b/pkg/sql/BUILD.bazel index d981cbeed312..7539f1d6ec20 100644 --- a/pkg/sql/BUILD.bazel +++ b/pkg/sql/BUILD.bazel @@ -463,6 +463,7 @@ go_library( "//pkg/util/buildutil", "//pkg/util/cache", "//pkg/util/cancelchecker", + "//pkg/util/collatedstring", "//pkg/util/contextutil", "//pkg/util/ctxgroup", "//pkg/util/duration", diff --git a/pkg/sql/information_schema.go b/pkg/sql/information_schema.go index aa837d9b752a..afea4b74a284 100644 --- a/pkg/sql/information_schema.go +++ b/pkg/sql/information_schema.go @@ -39,6 +39,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sqlutil" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/sql/vtable" + "github.com/cockroachdb/cockroach/pkg/util/collatedstring" "github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate" "github.com/cockroachdb/errors" "github.com/lib/pq/oid" @@ -1528,7 +1529,7 @@ https://www.postgresql.org/docs/current/infoschema-collations.html`, tree.NewDString("NO PAD"), ) } - if err := add(tree.DefaultCollationTag); err != nil { + if err := add(collatedstring.DefaultCollationTag); err != nil { return err } for _, tag := range collate.Supported() { @@ -1561,7 +1562,7 @@ https://www.postgresql.org/docs/current/infoschema-collation-character-set-appli tree.NewDString("UTF8"), // character_set_name: UTF8 is the only available encoding ) } - if err := add(tree.DefaultCollationTag); err != nil { + if err := add(collatedstring.DefaultCollationTag); err != nil { return err } for _, tag := range collate.Supported() { diff --git a/pkg/sql/pg_catalog.go b/pkg/sql/pg_catalog.go index 616d7dcda65b..ffdbe8f2df36 100644 --- a/pkg/sql/pg_catalog.go +++ b/pkg/sql/pg_catalog.go @@ -48,6 +48,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sqlerrors" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/sql/vtable" + "github.com/cockroachdb/cockroach/pkg/util/collatedstring" "github.com/cockroachdb/cockroach/pkg/util/duration" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/timeutil" @@ -809,7 +810,7 @@ https://www.postgresql.org/docs/9.5/catalog-pg-collation.html`, tree.DNull, // collisdeterministic ) } - if err := add(tree.DefaultCollationTag); err != nil { + if err := add(collatedstring.DefaultCollationTag); err != nil { return err } for _, tag := range collate.Supported() { @@ -4271,13 +4272,13 @@ func typColl(typ *types.T, h oidHasher) tree.Datum { case types.AnyFamily: return oidZero case types.StringFamily: - return h.CollationOid(tree.DefaultCollationTag) + return h.CollationOid(collatedstring.DefaultCollationTag) case types.CollatedStringFamily: return h.CollationOid(typ.Locale()) } if typ.Equivalent(types.StringArray) { - return h.CollationOid(tree.DefaultCollationTag) + return h.CollationOid(collatedstring.DefaultCollationTag) } return oidZero } diff --git a/pkg/sql/sem/tree/BUILD.bazel b/pkg/sql/sem/tree/BUILD.bazel index a0893db3d7f6..34c2fe572be3 100644 --- a/pkg/sql/sem/tree/BUILD.bazel +++ b/pkg/sql/sem/tree/BUILD.bazel @@ -23,7 +23,6 @@ go_library( "backup.go", "changefeed.go", "col_name.go", - "collatedstring.go", "comment_on_column.go", "comment_on_constraint.go", "comment_on_database.go", @@ -140,6 +139,7 @@ go_library( "//pkg/util", "//pkg/util/bitarray", "//pkg/util/cache", + "//pkg/util/collatedstring", "//pkg/util/duration", "//pkg/util/encoding", "//pkg/util/errorutil/unimplemented", diff --git a/pkg/sql/sem/tree/create.go b/pkg/sql/sem/tree/create.go index 4a01a6ab1672..33119531d0ae 100644 --- a/pkg/sql/sem/tree/create.go +++ b/pkg/sql/sem/tree/create.go @@ -29,6 +29,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode" "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror" "github.com/cockroachdb/cockroach/pkg/sql/types" + "github.com/cockroachdb/cockroach/pkg/util/collatedstring" "github.com/cockroachdb/cockroach/pkg/util/pretty" "github.com/cockroachdb/errors" "golang.org/x/text/language" @@ -544,7 +545,7 @@ func NewColumnTableDef( // In CRDB, collated strings are treated separately to string family types. // To most behave like postgres, set the CollatedString type if a non-"default" // collation is used. - if locale != DefaultCollationTag { + if locale != collatedstring.DefaultCollationTag { _, err := language.Parse(locale) if err != nil { return nil, pgerror.Wrapf(err, pgcode.Syntax, "invalid locale %s", locale) diff --git a/pkg/sql/sem/tree/type_check.go b/pkg/sql/sem/tree/type_check.go index 95cad91bf86e..5f7f8c18e334 100644 --- a/pkg/sql/sem/tree/type_check.go +++ b/pkg/sql/sem/tree/type_check.go @@ -22,6 +22,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sem/volatility" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util" + "github.com/cockroachdb/cockroach/pkg/util/collatedstring" "github.com/cockroachdb/cockroach/pkg/util/duration" "github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented" "github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate" @@ -698,7 +699,7 @@ func (expr *AnnotateTypeExpr) TypeCheck( func (expr *CollateExpr) TypeCheck( ctx context.Context, semaCtx *SemaContext, desired *types.T, ) (TypedExpr, error) { - if strings.ToLower(expr.Locale) == DefaultCollationTag { + if strings.ToLower(expr.Locale) == collatedstring.DefaultCollationTag { return nil, errors.WithHint( unimplemented.NewWithIssuef( 57255, diff --git a/pkg/util/collatedstring/BUILD.bazel b/pkg/util/collatedstring/BUILD.bazel new file mode 100644 index 000000000000..db73d047be00 --- /dev/null +++ b/pkg/util/collatedstring/BUILD.bazel @@ -0,0 +1,12 @@ +load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data") +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "collatedstring", + srcs = ["collatedstring.go"], + importpath = "github.com/cockroachdb/cockroach/pkg/util/collatedstring", + visibility = ["//visibility:public"], + deps = ["@org_golang_x_text//collate"], +) + +get_x_data(name = "get_x_data") diff --git a/pkg/sql/sem/tree/collatedstring.go b/pkg/util/collatedstring/collatedstring.go similarity index 91% rename from pkg/sql/sem/tree/collatedstring.go rename to pkg/util/collatedstring/collatedstring.go index 230d7cea7398..72c58cf05845 100644 --- a/pkg/sql/sem/tree/collatedstring.go +++ b/pkg/util/collatedstring/collatedstring.go @@ -1,4 +1,4 @@ -// Copyright 2020 The Cockroach Authors. +// Copyright 2023 The Cockroach Authors. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. @@ -8,7 +8,7 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -package tree +package collatedstring import "golang.org/x/text/collate" From 8c1e1bea501832e97a28b3fcdcccc7aaf6025c8b Mon Sep 17 00:00:00 2001 From: Rafi Shamim Date: Tue, 7 Feb 2023 21:58:15 -0500 Subject: [PATCH 2/2] collatedstring: support default, C, and POSIX in expressions Release note (sql change): Expressions of the form `COLLATE "default"`, `COLLATE "C"`, and `COLLATE "POSIX"` are now supported. Since the default collation cannot be changed currently, these expressions are all equivalent. The expressions are evaluated by treating the input as a normal string, and ignoring the collation. This means that comparisons between strings and collated strings that use "default", "C", or "POSIX" are now supported. Creating a column with the "C" or "POSIX" collations is still not supported. --- pkg/sql/BUILD.bazel | 1 - pkg/sql/information_schema.go | 17 +++------- .../testdata/logic_test/collatedstring | 27 +++++++++++++-- .../testdata/logic_test/information_schema | 4 +++ pkg/sql/pg_catalog.go | 9 ++--- pkg/sql/sem/tree/datum.go | 2 +- pkg/sql/sem/tree/type_check.go | 21 ++++-------- pkg/testutils/lint/lint_test.go | 34 +++++++++++++++++++ pkg/util/collatedstring/collatedstring.go | 31 +++++++++++++++++ 9 files changed, 107 insertions(+), 39 deletions(-) diff --git a/pkg/sql/BUILD.bazel b/pkg/sql/BUILD.bazel index 7539f1d6ec20..6f06f94d895b 100644 --- a/pkg/sql/BUILD.bazel +++ b/pkg/sql/BUILD.bazel @@ -518,7 +518,6 @@ go_library( "@in_gopkg_yaml_v2//:yaml_v2", "@io_opentelemetry_go_otel//attribute", "@org_golang_x_net//trace", - "@org_golang_x_text//collate", ], ) diff --git a/pkg/sql/information_schema.go b/pkg/sql/information_schema.go index afea4b74a284..66a97e41954e 100644 --- a/pkg/sql/information_schema.go +++ b/pkg/sql/information_schema.go @@ -43,7 +43,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate" "github.com/cockroachdb/errors" "github.com/lib/pq/oid" - "golang.org/x/text/collate" ) const ( @@ -1529,12 +1528,8 @@ https://www.postgresql.org/docs/current/infoschema-collations.html`, tree.NewDString("NO PAD"), ) } - if err := add(collatedstring.DefaultCollationTag); err != nil { - return err - } - for _, tag := range collate.Supported() { - collName := tag.String() - if err := add(collName); err != nil { + for _, tag := range collatedstring.Supported() { + if err := add(tag); err != nil { return err } } @@ -1562,12 +1557,8 @@ https://www.postgresql.org/docs/current/infoschema-collation-character-set-appli tree.NewDString("UTF8"), // character_set_name: UTF8 is the only available encoding ) } - if err := add(collatedstring.DefaultCollationTag); err != nil { - return err - } - for _, tag := range collate.Supported() { - collName := tag.String() - if err := add(collName); err != nil { + for _, tag := range collatedstring.Supported() { + if err := add(tag); err != nil { return err } } diff --git a/pkg/sql/logictest/testdata/logic_test/collatedstring b/pkg/sql/logictest/testdata/logic_test/collatedstring index c2ab4325bc13..f0d983b234d2 100644 --- a/pkg/sql/logictest/testdata/logic_test/collatedstring +++ b/pkg/sql/logictest/testdata/logic_test/collatedstring @@ -436,6 +436,30 @@ no_collation_str default no_collation_str_array default rowid NULL +# "default", "C", and "POSIX" all behave the same as normal strings when +# used in an expression. +query BBB +SELECT 'cat' = ('cat'::text collate "default"), 'cat' = ('cat'::VARCHAR(64) collate "C"), 'cat' = ('cat'::CHAR(3) collate "POSIX") +---- +true true true + +statement ok +INSERT INTO t54989 VALUES ('a', '{b}', 'c', 'd') + +query BB +SELECT default_collation = 'd' COLLATE "C", no_collation_str = 'a' COLLATE "POSIX" FROM t54989 +---- +true true + +# Creating a "C" or "POSIX" column is not allowed. In the future, we may +# add better support for collations, and it would be hard to do that if we +# start allowing columns like these. +statement error invalid locale C: language: tag is not well-formed +CREATE TABLE disallowed(a text COLLATE "C") + +statement error invalid locale POSIX: language: tag is not well-formed +CREATE TABLE disallowed(a text COLLATE "POSIX") + # Regression test for collated string lowercase and hyphen/underscore equality. subtest nocase_strings @@ -484,9 +508,6 @@ SELECT s FROM nocase_strings WHERE s = ('bbb' COLLATE "en-us-u-ks-l""evel2") statement error at or near "evel2": syntax error SELECT s FROM nocase_strings WHERE s = ('bbb' COLLATE "en-us-u-ks-l"evel2") -statement error DEFAULT collations are not supported -SELECT 'default collate'::text collate "default" - statement ok CREATE TABLE nocase_strings2 ( i INT, diff --git a/pkg/sql/logictest/testdata/logic_test/information_schema b/pkg/sql/logictest/testdata/logic_test/information_schema index 82081c97f969..bddb2b923ab8 100644 --- a/pkg/sql/logictest/testdata/logic_test/information_schema +++ b/pkg/sql/logictest/testdata/logic_test/information_schema @@ -4451,6 +4451,8 @@ SELECT * FROM information_schema.collations ---- collation_catalog collation_schema collation_name pad_attribute test pg_catalog default NO PAD +test pg_catalog C NO PAD +test pg_catalog POSIX NO PAD test pg_catalog und NO PAD test pg_catalog aa NO PAD test pg_catalog af NO PAD @@ -4556,6 +4558,8 @@ SELECT * FROM information_schema.collation_character_set_applicability ---- collation_catalog collation_schema collation_name character_set_catalog character_set_schema character_set_name test pg_catalog default NULL NULL UTF8 +test pg_catalog C NULL NULL UTF8 +test pg_catalog POSIX NULL NULL UTF8 test pg_catalog und NULL NULL UTF8 test pg_catalog aa NULL NULL UTF8 test pg_catalog af NULL NULL UTF8 diff --git a/pkg/sql/pg_catalog.go b/pkg/sql/pg_catalog.go index ffdbe8f2df36..0bac9f6b9fb0 100644 --- a/pkg/sql/pg_catalog.go +++ b/pkg/sql/pg_catalog.go @@ -54,7 +54,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" "github.com/lib/pq/oid" - "golang.org/x/text/collate" ) var ( @@ -810,12 +809,8 @@ https://www.postgresql.org/docs/9.5/catalog-pg-collation.html`, tree.DNull, // collisdeterministic ) } - if err := add(collatedstring.DefaultCollationTag); err != nil { - return err - } - for _, tag := range collate.Supported() { - collName := tag.String() - if err := add(collName); err != nil { + for _, tag := range collatedstring.Supported() { + if err := add(tag); err != nil { return err } } diff --git a/pkg/sql/sem/tree/datum.go b/pkg/sql/sem/tree/datum.go index 9f92952592ff..77985c3eb9e6 100644 --- a/pkg/sql/sem/tree/datum.go +++ b/pkg/sql/sem/tree/datum.go @@ -1042,7 +1042,7 @@ func ParseDDecimal(s string) (*DDecimal, error) { func (d *DDecimal) SetString(s string) error { // ExactCtx should be able to handle any decimal, but if there is any rounding // or other inexact conversion, it will result in an error. - //_, res, err := HighPrecisionCtx.SetString(&d.Decimal, s) + // _, res, err := HighPrecisionCtx.SetString(&d.Decimal, s) _, res, err := ExactCtx.SetString(&d.Decimal, s) if res != 0 || err != nil { return MakeParseError(s, types.Decimal, err) diff --git a/pkg/sql/sem/tree/type_check.go b/pkg/sql/sem/tree/type_check.go index 5f7f8c18e334..a1a08894dc1f 100644 --- a/pkg/sql/sem/tree/type_check.go +++ b/pkg/sql/sem/tree/type_check.go @@ -699,24 +699,17 @@ func (expr *AnnotateTypeExpr) TypeCheck( func (expr *CollateExpr) TypeCheck( ctx context.Context, semaCtx *SemaContext, desired *types.T, ) (TypedExpr, error) { - if strings.ToLower(expr.Locale) == collatedstring.DefaultCollationTag { - return nil, errors.WithHint( - unimplemented.NewWithIssuef( - 57255, - "DEFAULT collations are not supported", - ), - `omit the 'COLLATE "default"' clause in your statement`, - ) - } - _, err := language.Parse(expr.Locale) - if err != nil { - return nil, pgerror.Wrapf(err, pgcode.InvalidParameterValue, - "invalid locale %s", expr.Locale) - } subExpr, err := expr.Expr.TypeCheck(ctx, semaCtx, types.String) if err != nil { return nil, err } + if collatedstring.IsDefaultEquivalentCollation(expr.Locale) { + return subExpr, nil + } + if _, err := language.Parse(expr.Locale); err != nil { + return nil, pgerror.Wrapf(err, pgcode.InvalidParameterValue, + "invalid locale %s", expr.Locale) + } t := subExpr.ResolvedType() if types.IsStringType(t) { expr.Expr = subExpr diff --git a/pkg/testutils/lint/lint_test.go b/pkg/testutils/lint/lint_test.go index 7900d8632c7b..a830e4403b0a 100644 --- a/pkg/testutils/lint/lint_test.go +++ b/pkg/testutils/lint/lint_test.go @@ -717,6 +717,40 @@ func TestLint(t *testing.T) { } }) + t.Run("TestCollateSupported", func(t *testing.T) { + t.Parallel() + cmd, stderr, filter, err := dirCmd( + pkgDir, + "git", + "grep", + "-nE", + `\bcollate\.Supported\(`, + "--", + "*.go", + ":!util/collatedstring/collatedstring.go", + ":!ccl/changefeedccl/avro_test.go", + ) + if err != nil { + t.Fatal(err) + } + + if err := cmd.Start(); err != nil { + t.Fatal(err) + } + + if err := stream.ForEach(filter, func(s string) { + t.Errorf("\n%s <- forbidden; use 'collatedstring.Supported()' instead", s) + }); err != nil { + t.Error(err) + } + + if err := cmd.Wait(); err != nil { + if out := stderr.String(); len(out) > 0 { + t.Fatalf("err=%s, stderr=%s", err, out) + } + } + }) + t.Run("TestTimeutil", func(t *testing.T) { t.Parallel() cmd, stderr, filter, err := dirCmd( diff --git a/pkg/util/collatedstring/collatedstring.go b/pkg/util/collatedstring/collatedstring.go index 72c58cf05845..f3800b4c45d3 100644 --- a/pkg/util/collatedstring/collatedstring.go +++ b/pkg/util/collatedstring/collatedstring.go @@ -15,9 +15,40 @@ import "golang.org/x/text/collate" // DefaultCollationTag is the "default" collation for strings. const DefaultCollationTag = "default" +// CCollationTag is the "C" collation for strings. At the time of writing +// this, it behaves the same os "default" since LC_COLLATE cannot be modified. +const CCollationTag = "C" + +// PosixCollationTag is the "POSIX" collation for strings. At the time of +// writing this, it behaves the same os "default" since LC_COLLATE cannot be +// modified. +const PosixCollationTag = "POSIX" + +var supportedTagNames []string + +// IsDefaultEquivalentCollation returns true if the collation is "default", +// "C", or "POSIX". +func IsDefaultEquivalentCollation(s string) bool { + return s == DefaultCollationTag || s == CCollationTag || s == PosixCollationTag +} + +// Supported returns a list of all the collation names that are supported. +func Supported() []string { + return supportedTagNames +} + func init() { if collate.CLDRVersion != "23" { panic("This binary was built with an incompatible version of golang.org/x/text. " + "See https://github.com/cockroachdb/cockroach/issues/63738 for details") } + + supportedTagNames = []string{ + DefaultCollationTag, + CCollationTag, + PosixCollationTag, + } + for _, t := range collate.Supported() { + supportedTagNames = append(supportedTagNames, t.String()) + } }