From 2363d4403aaa43d39dd8b17373851135006adb11 Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Wed, 22 Aug 2018 11:50:35 +0200 Subject: [PATCH] cli,sql: add another crutch to the handling of column types in `dump` There are major problems in `cockroach dump` described separately in https://github.com/cockroachdb/cockroach/issues/28948. Part of this is `cockroach dump` trying to reverse-engineering a datum type from the announced column type in `information_schema.columns`, by parsing the type name. Prior to this patch, this parsing was incomplete (it went out of sync with the SQL parser over time, naturally as it was bound to do). This entire approach is flawed, but this is no time to redesign `cockroach dump`. Instead, this patch re-syncs the dump-specific type parser with the general SQL parser. Release note (bug fix): `cockroach dump` is now again better able to operate across multiple CockroachDB versions. --- pkg/cli/dump.go | 57 ++++++++++++++++++++------------- pkg/cli/dump_test.go | 12 +++++-- pkg/sql/sem/tree/parse_array.go | 40 ----------------------- 3 files changed, 44 insertions(+), 65 deletions(-) diff --git a/pkg/cli/dump.go b/pkg/cli/dump.go index 1d81002a19ae..11d7d90c13ab 100644 --- a/pkg/cli/dump.go +++ b/pkg/cli/dump.go @@ -29,6 +29,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/coltypes" "github.com/cockroachdb/cockroach/pkg/sql/lex" + "github.com/cockroachdb/cockroach/pkg/sql/parser" "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/util/ctxgroup" "github.com/cockroachdb/cockroach/pkg/util/timeofday" @@ -46,6 +47,10 @@ is omitted, dump all tables in the database. RunE: MaybeDecorateGRPCError(runDump), } +// runDumps performs a dump of a table or database. +// +// The approach here and its current flaws are summarized +// in https://github.com/cockroachdb/cockroach/issues/28948. func runDump(cmd *cobra.Command, args []string) error { conn, err := getPasswordAndMakeSQLClient("cockroach dump") if err != nil { @@ -187,7 +192,7 @@ type tableMetadata struct { basicMetadata columnNames string - columnTypes map[string]string + columnTypes map[string]coltypes.T } // getDumpMetadata retrieves the table information for the specified table(s). @@ -370,6 +375,8 @@ func extractArray(val interface{}) ([]string, error) { func getMetadataForTable(conn *sqlConn, md basicMetadata, ts string) (tableMetadata, error) { // Fetch column types. + // + // TODO(knz): this approach is flawed, see #28948. makeQuery := func(colname string) string { // This query is parameterized by the column name because of @@ -414,7 +421,7 @@ func getMetadataForTable(conn *sqlConn, md basicMetadata, ts string) (tableMetad } } vals := make([]driver.Value, 2) - coltypes := make(map[string]string) + coltypes := make(map[string]coltypes.T) colnames := tree.NewFmtCtxWithBuf(tree.FmtSimple) defer colnames.Close() for { @@ -432,7 +439,16 @@ func getMetadataForTable(conn *sqlConn, md basicMetadata, ts string) (tableMetad if !ok { return tableMetadata{}, fmt.Errorf("unexpected value: %T", typI) } - coltypes[name] = typ + + // Transform the type name to an internal coltype. + sql := fmt.Sprintf("ALTER TABLE woo ALTER COLUMN woo SET DATA TYPE %s", typ) + stmt, err := parser.ParseOne(sql) + if err != nil { + return tableMetadata{}, fmt.Errorf("type %s is not a valid CockroachDB type", typ) + } + coltyp := stmt.(*tree.AlterTable).Cmds[0].(*tree.AlterTableAlterColumnType).ToType + + coltypes[name] = coltyp if colnames.Len() > 0 { colnames.WriteString(", ") } @@ -576,6 +592,7 @@ func dumpTableData(w io.Writer, conn *sqlConn, clusterTS string, bmd basicMetada f.WriteString(", ") } var d tree.Datum + // TODO(knz): this approach is brittle+flawed, see #28948. switch t := sv.(type) { case nil: d = tree.DNull @@ -588,25 +605,26 @@ func dumpTableData(w io.Writer, conn *sqlConn, clusterTS string, bmd basicMetada case string: d = tree.NewDString(t) case []byte: + // TODO(knz): this approach is brittle+flawed, see #28948. switch ct := md.columnTypes[cols[si]]; ct { - case "INTERVAL": + case coltypes.Interval: d, err = tree.ParseDInterval(string(t)) if err != nil { return err } - case "BYTES": + case coltypes.Bytes: d = tree.NewDBytes(tree.DBytes(t)) - case "UUID": + case coltypes.UUID: d, err = tree.ParseDUuidFromString(string(t)) if err != nil { return err } - case "INET": + case coltypes.INet: d, err = tree.ParseDIPAddrFromINetString(string(t)) if err != nil { return err } - case "JSON": + case coltypes.JSON, coltypes.JSONB: d, err = tree.ParseDJSON(string(t)) if err != nil { return err @@ -615,20 +633,16 @@ func dumpTableData(w io.Writer, conn *sqlConn, clusterTS string, bmd basicMetada // STRING and DECIMAL types can have optional length // suffixes, so only examine the prefix of the type. // In addition, we can only observe ARRAY types by their [] suffix. - if strings.HasSuffix(md.columnTypes[cols[si]], "[]") { - typ := strings.TrimRight(md.columnTypes[cols[si]], "[]") - elemType, err := tree.ArrayElementTypeStringToColType(typ) - if err != nil { - return err - } + if arrayType, ok := ct.(*coltypes.TArray); ok { + elemType := arrayType.ParamType d, err = tree.ParseDArrayFromString( tree.NewTestingEvalContext(serverCfg.Settings), string(t), elemType) if err != nil { return err } - } else if strings.HasPrefix(md.columnTypes[cols[si]], "STRING") { + } else if _, ok := ct.(*coltypes.TString); ok { d = tree.NewDString(string(t)) - } else if strings.HasPrefix(md.columnTypes[cols[si]], "DECIMAL") { + } else if _, ok := ct.(*coltypes.TDecimal); ok { d, err = tree.ParseDDecimal(string(t)) if err != nil { return err @@ -638,16 +652,15 @@ func dumpTableData(w io.Writer, conn *sqlConn, clusterTS string, bmd basicMetada } } case time.Time: - ct := md.columnTypes[cols[si]] - switch ct { - case "DATE": + switch ct := md.columnTypes[cols[si]]; ct { + case coltypes.Date: d = tree.NewDDateFromTime(t, time.UTC) - case "TIME": + case coltypes.Time: // pq awkwardly represents TIME as a time.Time with date 0000-01-01. d = tree.MakeDTime(timeofday.FromTime(t)) - case "TIMESTAMP": + case coltypes.Timestamp: d = tree.MakeDTimestamp(t, time.Nanosecond) - case "TIMESTAMP WITH TIME ZONE": + case coltypes.TimestampWithTZ: d = tree.MakeDTimestampTZ(t, time.Nanosecond) default: return errors.Errorf("unknown timestamp type: %s, %v: %s", t, cols[si], md.columnTypes[cols[si]]) diff --git a/pkg/cli/dump_test.go b/pkg/cli/dump_test.go index 84000d581ce6..394925702ddb 100644 --- a/pkg/cli/dump_test.go +++ b/pkg/cli/dump_test.go @@ -198,7 +198,10 @@ func TestDumpRandom(t *testing.T) { CREATE TABLE d.t ( rowid int, i int, + si smallint, + bi bigint, f float, + fr real, d date, m timestamp, n interval, @@ -209,7 +212,7 @@ func TestDumpRandom(t *testing.T) { u uuid, ip inet, j json, - PRIMARY KEY (rowid, i, f, d, m, n, o, e, s, b, u, ip) + PRIMARY KEY (rowid, i, si, bi, f, fr, d, m, n, o, e, s, b, u, ip) ); SET extra_float_digits = 3; `, nil); err != nil { @@ -278,7 +281,10 @@ func TestDumpRandom(t *testing.T) { vals := []driver.Value{ _i, i, + i & 0x7fff, // si + i, // bi f, + f, // fr d, m, []byte(n), // intervals come out as `[]byte`s @@ -290,14 +296,14 @@ func TestDumpRandom(t *testing.T) { []byte(ip.String()), []byte(j.String()), } - if err := conn.Exec("INSERT INTO d.t VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)", vals); err != nil { + if err := conn.Exec("INSERT INTO d.t VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16)", vals); err != nil { t.Fatal(err) } generatedRows = append(generatedRows, vals[1:]) } check := func(table string) { - q := fmt.Sprintf("SELECT i, f, d, m, n, o, e, s, b, u, ip, j FROM %s ORDER BY rowid", table) + q := fmt.Sprintf("SELECT i, si, bi, f, fr, d, m, n, o, e, s, b, u, ip, j FROM %s ORDER BY rowid", table) nrows, err := conn.Query(q, nil) if err != nil { t.Fatal(err) diff --git a/pkg/sql/sem/tree/parse_array.go b/pkg/sql/sem/tree/parse_array.go index 9b168206b475..b705d4e51a65 100644 --- a/pkg/sql/sem/tree/parse_array.go +++ b/pkg/sql/sem/tree/parse_array.go @@ -145,46 +145,6 @@ func (p *parseState) parseElement() error { return p.result.Append(d) } -// ArrayElementTypeStringToColType returns a column type given a -// string representation of the type. Used by dump. It only -// supports those type names that can appear immediately before `[]`. -func ArrayElementTypeStringToColType(s string) (coltypes.T, error) { - switch s { - case "BOOL": - return coltypes.Bool, nil - case "INT": - return coltypes.Int, nil - case "FLOAT", "FLOAT8", "DOUBLE PRECISION": - return coltypes.Float8, nil - case "REAL", "FLOAT4": - return coltypes.Float4, nil - case "DECIMAL": - return coltypes.Decimal, nil - case "TIMESTAMP": - return coltypes.Timestamp, nil - case "TIMESTAMPTZ", "TIMESTAMP WITH TIME ZONE": - return coltypes.TimestampWithTZ, nil - case "INTERVAL": - return coltypes.Interval, nil - case "UUID": - return coltypes.UUID, nil - case "INET": - return coltypes.INet, nil - case "DATE": - return coltypes.Date, nil - case "TIME": - return coltypes.Time, nil - case "STRING": - return coltypes.String, nil - case "NAME": - return coltypes.Name, nil - case "BYTES": - return coltypes.Bytes, nil - default: - return nil, pgerror.NewErrorf(pgerror.CodeInternalError, "unexpected column type %s", s) - } -} - // ParseDArrayFromString parses the string-form of constructing arrays, handling // cases such as `'{1,2,3}'::INT[]`. func ParseDArrayFromString(evalCtx *EvalContext, s string, t coltypes.T) (*DArray, error) {