Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release-22.2: collatedstring: support C and POSIX in expressions #97415

Merged
merged 2 commits into from
Feb 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -1915,6 +1915,7 @@ GO_TARGETS = [
"//pkg/util/circuit:circuit_test",
"//pkg/util/cloudinfo:cloudinfo",
"//pkg/util/cloudinfo:cloudinfo_test",
"//pkg/util/collatedstring:collatedstring",
"//pkg/util/contextutil:contextutil",
"//pkg/util/contextutil:contextutil_test",
"//pkg/util/ctxgroup:ctxgroup",
Expand Down Expand Up @@ -2931,6 +2932,7 @@ GET_X_DATA_TARGETS = [
"//pkg/util/cgroups:get_x_data",
"//pkg/util/circuit:get_x_data",
"//pkg/util/cloudinfo:get_x_data",
"//pkg/util/collatedstring:get_x_data",
"//pkg/util/contextutil:get_x_data",
"//pkg/util/ctxgroup:get_x_data",
"//pkg/util/duration:get_x_data",
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,7 @@ go_library(
"//pkg/util/buildutil",
"//pkg/util/cache",
"//pkg/util/cancelchecker",
"//pkg/util/collatedstring",
"//pkg/util/contextutil",
"//pkg/util/ctxgroup",
"//pkg/util/duration",
Expand Down Expand Up @@ -517,7 +518,6 @@ go_library(
"@in_gopkg_yaml_v2//:yaml_v2",
"@io_opentelemetry_go_otel//attribute",
"@org_golang_x_net//trace",
"@org_golang_x_text//collate",
],
)

Expand Down
18 changes: 5 additions & 13 deletions pkg/sql/information_schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sqlutil"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/sql/vtable"
"github.com/cockroachdb/cockroach/pkg/util/collatedstring"
"github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate"
"github.com/cockroachdb/errors"
"github.com/lib/pq/oid"
"golang.org/x/text/collate"
)

const (
Expand Down Expand Up @@ -1528,12 +1528,8 @@ https://www.postgresql.org/docs/current/infoschema-collations.html`,
tree.NewDString("NO PAD"),
)
}
if err := add(tree.DefaultCollationTag); err != nil {
return err
}
for _, tag := range collate.Supported() {
collName := tag.String()
if err := add(collName); err != nil {
for _, tag := range collatedstring.Supported() {
if err := add(tag); err != nil {
return err
}
}
Expand Down Expand Up @@ -1561,12 +1557,8 @@ https://www.postgresql.org/docs/current/infoschema-collation-character-set-appli
tree.NewDString("UTF8"), // character_set_name: UTF8 is the only available encoding
)
}
if err := add(tree.DefaultCollationTag); err != nil {
return err
}
for _, tag := range collate.Supported() {
collName := tag.String()
if err := add(collName); err != nil {
for _, tag := range collatedstring.Supported() {
if err := add(tag); err != nil {
return err
}
}
Expand Down
27 changes: 24 additions & 3 deletions pkg/sql/logictest/testdata/logic_test/collatedstring
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,30 @@ no_collation_str default
no_collation_str_array default
rowid NULL

# "default", "C", and "POSIX" all behave the same as normal strings when
# used in an expression.
query BBB
SELECT 'cat' = ('cat'::text collate "default"), 'cat' = ('cat'::VARCHAR(64) collate "C"), 'cat' = ('cat'::CHAR(3) collate "POSIX")
----
true true true

statement ok
INSERT INTO t54989 VALUES ('a', '{b}', 'c', 'd')

query BB
SELECT default_collation = 'd' COLLATE "C", no_collation_str = 'a' COLLATE "POSIX" FROM t54989
----
true true

# Creating a "C" or "POSIX" column is not allowed. In the future, we may
# add better support for collations, and it would be hard to do that if we
# start allowing columns like these.
statement error invalid locale C: language: tag is not well-formed
CREATE TABLE disallowed(a text COLLATE "C")

statement error invalid locale POSIX: language: tag is not well-formed
CREATE TABLE disallowed(a text COLLATE "POSIX")

# Regression test for collated string lowercase and hyphen/underscore equality.
subtest nocase_strings

Expand Down Expand Up @@ -484,9 +508,6 @@ SELECT s FROM nocase_strings WHERE s = ('bbb' COLLATE "en-us-u-ks-l""evel2")
statement error at or near "evel2": syntax error
SELECT s FROM nocase_strings WHERE s = ('bbb' COLLATE "en-us-u-ks-l"evel2")

statement error DEFAULT collations are not supported
SELECT 'default collate'::text collate "default"

statement ok
CREATE TABLE nocase_strings2 (
i INT,
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/information_schema
Original file line number Diff line number Diff line change
Expand Up @@ -4451,6 +4451,8 @@ SELECT * FROM information_schema.collations
----
collation_catalog collation_schema collation_name pad_attribute
test pg_catalog default NO PAD
test pg_catalog C NO PAD
test pg_catalog POSIX NO PAD
test pg_catalog und NO PAD
test pg_catalog aa NO PAD
test pg_catalog af NO PAD
Expand Down Expand Up @@ -4556,6 +4558,8 @@ SELECT * FROM information_schema.collation_character_set_applicability
----
collation_catalog collation_schema collation_name character_set_catalog character_set_schema character_set_name
test pg_catalog default NULL NULL UTF8
test pg_catalog C NULL NULL UTF8
test pg_catalog POSIX NULL NULL UTF8
test pg_catalog und NULL NULL UTF8
test pg_catalog aa NULL NULL UTF8
test pg_catalog af NULL NULL UTF8
Expand Down
14 changes: 5 additions & 9 deletions pkg/sql/pg_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,12 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sqlerrors"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/sql/vtable"
"github.com/cockroachdb/cockroach/pkg/util/collatedstring"
"github.com/cockroachdb/cockroach/pkg/util/duration"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
"github.com/lib/pq/oid"
"golang.org/x/text/collate"
)

var (
Expand Down Expand Up @@ -809,12 +809,8 @@ https://www.postgresql.org/docs/9.5/catalog-pg-collation.html`,
tree.DNull, // collisdeterministic
)
}
if err := add(tree.DefaultCollationTag); err != nil {
return err
}
for _, tag := range collate.Supported() {
collName := tag.String()
if err := add(collName); err != nil {
for _, tag := range collatedstring.Supported() {
if err := add(tag); err != nil {
return err
}
}
Expand Down Expand Up @@ -4271,13 +4267,13 @@ func typColl(typ *types.T, h oidHasher) tree.Datum {
case types.AnyFamily:
return oidZero
case types.StringFamily:
return h.CollationOid(tree.DefaultCollationTag)
return h.CollationOid(collatedstring.DefaultCollationTag)
case types.CollatedStringFamily:
return h.CollationOid(typ.Locale())
}

if typ.Equivalent(types.StringArray) {
return h.CollationOid(tree.DefaultCollationTag)
return h.CollationOid(collatedstring.DefaultCollationTag)
}
return oidZero
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/sem/tree/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ go_library(
"backup.go",
"changefeed.go",
"col_name.go",
"collatedstring.go",
"comment_on_column.go",
"comment_on_constraint.go",
"comment_on_database.go",
Expand Down Expand Up @@ -140,6 +139,7 @@ go_library(
"//pkg/util",
"//pkg/util/bitarray",
"//pkg/util/cache",
"//pkg/util/collatedstring",
"//pkg/util/duration",
"//pkg/util/encoding",
"//pkg/util/errorutil/unimplemented",
Expand Down
23 changes: 0 additions & 23 deletions pkg/sql/sem/tree/collatedstring.go

This file was deleted.

3 changes: 2 additions & 1 deletion pkg/sql/sem/tree/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/collatedstring"
"github.com/cockroachdb/cockroach/pkg/util/pretty"
"github.com/cockroachdb/errors"
"golang.org/x/text/language"
Expand Down Expand Up @@ -544,7 +545,7 @@ func NewColumnTableDef(
// In CRDB, collated strings are treated separately to string family types.
// To most behave like postgres, set the CollatedString type if a non-"default"
// collation is used.
if locale != DefaultCollationTag {
if locale != collatedstring.DefaultCollationTag {
_, err := language.Parse(locale)
if err != nil {
return nil, pgerror.Wrapf(err, pgcode.Syntax, "invalid locale %s", locale)
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/sem/tree/datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -1042,7 +1042,7 @@ func ParseDDecimal(s string) (*DDecimal, error) {
func (d *DDecimal) SetString(s string) error {
// ExactCtx should be able to handle any decimal, but if there is any rounding
// or other inexact conversion, it will result in an error.
//_, res, err := HighPrecisionCtx.SetString(&d.Decimal, s)
// _, res, err := HighPrecisionCtx.SetString(&d.Decimal, s)
_, res, err := ExactCtx.SetString(&d.Decimal, s)
if res != 0 || err != nil {
return MakeParseError(s, types.Decimal, err)
Expand Down
22 changes: 8 additions & 14 deletions pkg/sql/sem/tree/type_check.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sem/volatility"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/collatedstring"
"github.com/cockroachdb/cockroach/pkg/util/duration"
"github.com/cockroachdb/cockroach/pkg/util/errorutil/unimplemented"
"github.com/cockroachdb/cockroach/pkg/util/timeutil/pgdate"
Expand Down Expand Up @@ -698,24 +699,17 @@ func (expr *AnnotateTypeExpr) TypeCheck(
func (expr *CollateExpr) TypeCheck(
ctx context.Context, semaCtx *SemaContext, desired *types.T,
) (TypedExpr, error) {
if strings.ToLower(expr.Locale) == DefaultCollationTag {
return nil, errors.WithHint(
unimplemented.NewWithIssuef(
57255,
"DEFAULT collations are not supported",
),
`omit the 'COLLATE "default"' clause in your statement`,
)
}
_, err := language.Parse(expr.Locale)
if err != nil {
return nil, pgerror.Wrapf(err, pgcode.InvalidParameterValue,
"invalid locale %s", expr.Locale)
}
subExpr, err := expr.Expr.TypeCheck(ctx, semaCtx, types.String)
if err != nil {
return nil, err
}
if collatedstring.IsDefaultEquivalentCollation(expr.Locale) {
return subExpr, nil
}
if _, err := language.Parse(expr.Locale); err != nil {
return nil, pgerror.Wrapf(err, pgcode.InvalidParameterValue,
"invalid locale %s", expr.Locale)
}
t := subExpr.ResolvedType()
if types.IsStringType(t) {
expr.Expr = subExpr
Expand Down
34 changes: 34 additions & 0 deletions pkg/testutils/lint/lint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,40 @@ func TestLint(t *testing.T) {
}
})

t.Run("TestCollateSupported", func(t *testing.T) {
t.Parallel()
cmd, stderr, filter, err := dirCmd(
pkgDir,
"git",
"grep",
"-nE",
`\bcollate\.Supported\(`,
"--",
"*.go",
":!util/collatedstring/collatedstring.go",
":!ccl/changefeedccl/avro_test.go",
)
if err != nil {
t.Fatal(err)
}

if err := cmd.Start(); err != nil {
t.Fatal(err)
}

if err := stream.ForEach(filter, func(s string) {
t.Errorf("\n%s <- forbidden; use 'collatedstring.Supported()' instead", s)
}); err != nil {
t.Error(err)
}

if err := cmd.Wait(); err != nil {
if out := stderr.String(); len(out) > 0 {
t.Fatalf("err=%s, stderr=%s", err, out)
}
}
})

t.Run("TestTimeutil", func(t *testing.T) {
t.Parallel()
cmd, stderr, filter, err := dirCmd(
Expand Down
12 changes: 12 additions & 0 deletions pkg/util/collatedstring/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data")
load("@io_bazel_rules_go//go:def.bzl", "go_library")

go_library(
name = "collatedstring",
srcs = ["collatedstring.go"],
importpath = "github.com/cockroachdb/cockroach/pkg/util/collatedstring",
visibility = ["//visibility:public"],
deps = ["@org_golang_x_text//collate"],
)

get_x_data(name = "get_x_data")
54 changes: 54 additions & 0 deletions pkg/util/collatedstring/collatedstring.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Copyright 2023 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package collatedstring

import "golang.org/x/text/collate"

// DefaultCollationTag is the "default" collation for strings.
const DefaultCollationTag = "default"

// CCollationTag is the "C" collation for strings. At the time of writing
// this, it behaves the same os "default" since LC_COLLATE cannot be modified.
const CCollationTag = "C"

// PosixCollationTag is the "POSIX" collation for strings. At the time of
// writing this, it behaves the same os "default" since LC_COLLATE cannot be
// modified.
const PosixCollationTag = "POSIX"

var supportedTagNames []string

// IsDefaultEquivalentCollation returns true if the collation is "default",
// "C", or "POSIX".
func IsDefaultEquivalentCollation(s string) bool {
return s == DefaultCollationTag || s == CCollationTag || s == PosixCollationTag
}

// Supported returns a list of all the collation names that are supported.
func Supported() []string {
return supportedTagNames
}

func init() {
if collate.CLDRVersion != "23" {
panic("This binary was built with an incompatible version of golang.org/x/text. " +
"See https://github.com/cockroachdb/cockroach/issues/63738 for details")
}

supportedTagNames = []string{
DefaultCollationTag,
CCollationTag,
PosixCollationTag,
}
for _, t := range collate.Supported() {
supportedTagNames = append(supportedTagNames, t.String())
}
}