Skip to content

Commit

Permalink
collatedstring: support default, C, and POSIX in expressions
Browse files Browse the repository at this point in the history
Release note (sql change): Expressions of the form `COLLATE "default"`,
`COLLATE "C"`, and `COLLATE "POSIX"` are now supported. Since the
default collation cannot be changed currently, these expressions are all
equivalent. The expressions are evaluated by treating the input as a
normal string, and ignoring the collation.

This means that comparisons between strings and collated strings that
use "default", "C", or "POSIX" are now supported.

Creating a column with the "C" or "POSIX" collations is still not
supported.
  • Loading branch information
rafiss committed Feb 21, 2023
1 parent 3e0df2f commit 8c1e1be
Show file tree
Hide file tree
Showing 9 changed files with 107 additions and 39 deletions.
1 change: 0 additions & 1 deletion pkg/sql/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,6 @@ go_library(
"@in_gopkg_yaml_v2//:yaml_v2",
"@io_opentelemetry_go_otel//attribute",
"@org_golang_x_net//trace",
"@org_golang_x_text//collate",
],
)

Expand Down
17 changes: 4 additions & 13 deletions pkg/sql/information_schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate"
"github.com/cockroachdb/errors"
"github.com/lib/pq/oid"
"golang.org/x/text/collate"
)

const (
Expand Down Expand Up @@ -1529,12 +1528,8 @@ https://www.postgresql.org/docs/current/infoschema-collations.html`,
tree.NewDString("NO PAD"),
)
}
if err := add(collatedstring.DefaultCollationTag); err != nil {
return err
}
for _, tag := range collate.Supported() {
collName := tag.String()
if err := add(collName); err != nil {
for _, tag := range collatedstring.Supported() {
if err := add(tag); err != nil {
return err
}
}
Expand Down Expand Up @@ -1562,12 +1557,8 @@ https://www.postgresql.org/docs/current/infoschema-collation-character-set-appli
tree.NewDString("UTF8"), // character_set_name: UTF8 is the only available encoding
)
}
if err := add(collatedstring.DefaultCollationTag); err != nil {
return err
}
for _, tag := range collate.Supported() {
collName := tag.String()
if err := add(collName); err != nil {
for _, tag := range collatedstring.Supported() {
if err := add(tag); err != nil {
return err
}
}
Expand Down
27 changes: 24 additions & 3 deletions pkg/sql/logictest/testdata/logic_test/collatedstring
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,30 @@ no_collation_str default
no_collation_str_array default
rowid NULL

# "default", "C", and "POSIX" all behave the same as normal strings when
# used in an expression.
query BBB
SELECT 'cat' = ('cat'::text collate "default"), 'cat' = ('cat'::VARCHAR(64) collate "C"), 'cat' = ('cat'::CHAR(3) collate "POSIX")
----
true true true

statement ok
INSERT INTO t54989 VALUES ('a', '{b}', 'c', 'd')

query BB
SELECT default_collation = 'd' COLLATE "C", no_collation_str = 'a' COLLATE "POSIX" FROM t54989
----
true true

# Creating a "C" or "POSIX" column is not allowed. In the future, we may
# add better support for collations, and it would be hard to do that if we
# start allowing columns like these.
statement error invalid locale C: language: tag is not well-formed
CREATE TABLE disallowed(a text COLLATE "C")

statement error invalid locale POSIX: language: tag is not well-formed
CREATE TABLE disallowed(a text COLLATE "POSIX")

# Regression test for collated string lowercase and hyphen/underscore equality.
subtest nocase_strings

Expand Down Expand Up @@ -484,9 +508,6 @@ SELECT s FROM nocase_strings WHERE s = ('bbb' COLLATE "en-us-u-ks-l""evel2")
statement error at or near "evel2": syntax error
SELECT s FROM nocase_strings WHERE s = ('bbb' COLLATE "en-us-u-ks-l"evel2")

statement error DEFAULT collations are not supported
SELECT 'default collate'::text collate "default"

statement ok
CREATE TABLE nocase_strings2 (
i INT,
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/information_schema
Original file line number Diff line number Diff line change
Expand Up @@ -4451,6 +4451,8 @@ SELECT * FROM information_schema.collations
----
collation_catalog collation_schema collation_name pad_attribute
test pg_catalog default NO PAD
test pg_catalog C NO PAD
test pg_catalog POSIX NO PAD
test pg_catalog und NO PAD
test pg_catalog aa NO PAD
test pg_catalog af NO PAD
Expand Down Expand Up @@ -4556,6 +4558,8 @@ SELECT * FROM information_schema.collation_character_set_applicability
----
collation_catalog collation_schema collation_name character_set_catalog character_set_schema character_set_name
test pg_catalog default NULL NULL UTF8
test pg_catalog C NULL NULL UTF8
test pg_catalog POSIX NULL NULL UTF8
test pg_catalog und NULL NULL UTF8
test pg_catalog aa NULL NULL UTF8
test pg_catalog af NULL NULL UTF8
Expand Down
9 changes: 2 additions & 7 deletions pkg/sql/pg_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
"github.com/lib/pq/oid"
"golang.org/x/text/collate"
)

var (
Expand Down Expand Up @@ -810,12 +809,8 @@ https://www.postgresql.org/docs/9.5/catalog-pg-collation.html`,
tree.DNull, // collisdeterministic
)
}
if err := add(collatedstring.DefaultCollationTag); err != nil {
return err
}
for _, tag := range collate.Supported() {
collName := tag.String()
if err := add(collName); err != nil {
for _, tag := range collatedstring.Supported() {
if err := add(tag); err != nil {
return err
}
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/sem/tree/datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -1042,7 +1042,7 @@ func ParseDDecimal(s string) (*DDecimal, error) {
func (d *DDecimal) SetString(s string) error {
// ExactCtx should be able to handle any decimal, but if there is any rounding
// or other inexact conversion, it will result in an error.
//_, res, err := HighPrecisionCtx.SetString(&d.Decimal, s)
// _, res, err := HighPrecisionCtx.SetString(&d.Decimal, s)
_, res, err := ExactCtx.SetString(&d.Decimal, s)
if res != 0 || err != nil {
return MakeParseError(s, types.Decimal, err)
Expand Down
21 changes: 7 additions & 14 deletions pkg/sql/sem/tree/type_check.go
Original file line number Diff line number Diff line change
Expand Up @@ -699,24 +699,17 @@ func (expr *AnnotateTypeExpr) TypeCheck(
func (expr *CollateExpr) TypeCheck(
ctx context.Context, semaCtx *SemaContext, desired *types.T,
) (TypedExpr, error) {
if strings.ToLower(expr.Locale) == collatedstring.DefaultCollationTag {
return nil, errors.WithHint(
unimplemented.NewWithIssuef(
57255,
"DEFAULT collations are not supported",
),
`omit the 'COLLATE "default"' clause in your statement`,
)
}
_, err := language.Parse(expr.Locale)
if err != nil {
return nil, pgerror.Wrapf(err, pgcode.InvalidParameterValue,
"invalid locale %s", expr.Locale)
}
subExpr, err := expr.Expr.TypeCheck(ctx, semaCtx, types.String)
if err != nil {
return nil, err
}
if collatedstring.IsDefaultEquivalentCollation(expr.Locale) {
return subExpr, nil
}
if _, err := language.Parse(expr.Locale); err != nil {
return nil, pgerror.Wrapf(err, pgcode.InvalidParameterValue,
"invalid locale %s", expr.Locale)
}
t := subExpr.ResolvedType()
if types.IsStringType(t) {
expr.Expr = subExpr
Expand Down
34 changes: 34 additions & 0 deletions pkg/testutils/lint/lint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,40 @@ func TestLint(t *testing.T) {
}
})

t.Run("TestCollateSupported", func(t *testing.T) {
t.Parallel()
cmd, stderr, filter, err := dirCmd(
pkgDir,
"git",
"grep",
"-nE",
`\bcollate\.Supported\(`,
"--",
"*.go",
":!util/collatedstring/collatedstring.go",
":!ccl/changefeedccl/avro_test.go",
)
if err != nil {
t.Fatal(err)
}

if err := cmd.Start(); err != nil {
t.Fatal(err)
}

if err := stream.ForEach(filter, func(s string) {
t.Errorf("\n%s <- forbidden; use 'collatedstring.Supported()' instead", s)
}); err != nil {
t.Error(err)
}

if err := cmd.Wait(); err != nil {
if out := stderr.String(); len(out) > 0 {
t.Fatalf("err=%s, stderr=%s", err, out)
}
}
})

t.Run("TestTimeutil", func(t *testing.T) {
t.Parallel()
cmd, stderr, filter, err := dirCmd(
Expand Down
31 changes: 31 additions & 0 deletions pkg/util/collatedstring/collatedstring.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,40 @@ import "golang.org/x/text/collate"
// DefaultCollationTag is the "default" collation for strings.
const DefaultCollationTag = "default"

// CCollationTag is the "C" collation for strings. At the time of writing
// this, it behaves the same os "default" since LC_COLLATE cannot be modified.
const CCollationTag = "C"

// PosixCollationTag is the "POSIX" collation for strings. At the time of
// writing this, it behaves the same os "default" since LC_COLLATE cannot be
// modified.
const PosixCollationTag = "POSIX"

var supportedTagNames []string

// IsDefaultEquivalentCollation returns true if the collation is "default",
// "C", or "POSIX".
func IsDefaultEquivalentCollation(s string) bool {
return s == DefaultCollationTag || s == CCollationTag || s == PosixCollationTag
}

// Supported returns a list of all the collation names that are supported.
func Supported() []string {
return supportedTagNames
}

func init() {
if collate.CLDRVersion != "23" {
panic("This binary was built with an incompatible version of golang.org/x/text. " +
"See https://github.com/cockroachdb/cockroach/issues/63738 for details")
}

supportedTagNames = []string{
DefaultCollationTag,
CCollationTag,
PosixCollationTag,
}
for _, t := range collate.Supported() {
supportedTagNames = append(supportedTagNames, t.String())
}
}

0 comments on commit 8c1e1be

Please sign in to comment.