Skip to content

Commit

Permalink
cli,sql: add another crutch to the handling of column types in dump
Browse files Browse the repository at this point in the history
There are major problems in `cockroach dump` described separately in
cockroachdb#28948.

Part of this is `cockroach dump` trying to reverse-engineering a datum
type from the announced column type in `information_schema.columns`,
by parsing the type name. Prior to this patch, this parsing was
incomplete (it went out of sync with the SQL parser over time,
naturally as it was bound to do).

This entire approach is flawed, but this is no time to redesign
`cockroach dump`. Instead, this patch re-syncs the dump-specific type
parser with the general SQL parser. It also ensures this parser is
still able to parse pre-2.1 type name aliases.

Release note (bug fix): `cockroach dump` is now again better able to
operate across multiple CockroachDB versions.
  • Loading branch information
knz committed Aug 22, 2018
1 parent 1ee00e9 commit e4f172f
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 48 deletions.
79 changes: 74 additions & 5 deletions pkg/cli/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (

"github.com/cockroachdb/cockroach/pkg/sql/coltypes"
"github.com/cockroachdb/cockroach/pkg/sql/lex"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
"github.com/cockroachdb/cockroach/pkg/util/timeofday"
Expand All @@ -46,6 +47,10 @@ is omitted, dump all tables in the database.
RunE: MaybeDecorateGRPCError(runDump),
}

// runDumps performs a dump of a table or database.
//
// The approach here and its current flaws are summarized
// in https://github.com/cockroachdb/cockroach/issues/28948.
func runDump(cmd *cobra.Command, args []string) error {
conn, err := getPasswordAndMakeSQLClient("cockroach dump")
if err != nil {
Expand Down Expand Up @@ -370,6 +375,8 @@ func extractArray(val interface{}) ([]string, error) {

func getMetadataForTable(conn *sqlConn, md basicMetadata, ts string) (tableMetadata, error) {
// Fetch column types.
//
// TODO(knz): this approach is flawed, see #28948.

makeQuery := func(colname string) string {
// This query is parameterized by the column name because of
Expand Down Expand Up @@ -576,6 +583,7 @@ func dumpTableData(w io.Writer, conn *sqlConn, clusterTS string, bmd basicMetada
f.WriteString(", ")
}
var d tree.Datum
// TODO(knz): this approach is brittle+flawed, see #28948.
switch t := sv.(type) {
case nil:
d = tree.DNull
Expand All @@ -588,13 +596,14 @@ func dumpTableData(w io.Writer, conn *sqlConn, clusterTS string, bmd basicMetada
case string:
d = tree.NewDString(t)
case []byte:
// TODO(knz): this approach is brittle+flawed, see #28948.
switch ct := md.columnTypes[cols[si]]; ct {
case "INTERVAL":
d, err = tree.ParseDInterval(string(t))
if err != nil {
return err
}
case "BYTES":
case "BYTES", "BLOB", "BYTEA":
d = tree.NewDBytes(tree.DBytes(t))
case "UUID":
d, err = tree.ParseDUuidFromString(string(t))
Expand All @@ -606,7 +615,7 @@ func dumpTableData(w io.Writer, conn *sqlConn, clusterTS string, bmd basicMetada
if err != nil {
return err
}
case "JSON":
case "JSON", "JSONB":
d, err = tree.ParseDJSON(string(t))
if err != nil {
return err
Expand All @@ -617,7 +626,7 @@ func dumpTableData(w io.Writer, conn *sqlConn, clusterTS string, bmd basicMetada
// In addition, we can only observe ARRAY types by their [] suffix.
if strings.HasSuffix(md.columnTypes[cols[si]], "[]") {
typ := strings.TrimRight(md.columnTypes[cols[si]], "[]")
elemType, err := tree.ArrayElementTypeStringToColType(typ)
elemType, err := parseArrayElementTypeString(typ)
if err != nil {
return err
}
Expand All @@ -626,9 +635,14 @@ func dumpTableData(w io.Writer, conn *sqlConn, clusterTS string, bmd basicMetada
if err != nil {
return err
}
} else if strings.HasPrefix(md.columnTypes[cols[si]], "STRING") {
} else if strings.HasPrefix(md.columnTypes[cols[si]], "STRING") ||
strings.HasPrefix(md.columnTypes[cols[si]], "TEXT") ||
strings.HasPrefix(md.columnTypes[cols[si]], "CHAR") ||
strings.HasPrefix(md.columnTypes[cols[si]], "VARCHAR") ||
strings.HasPrefix(md.columnTypes[cols[si]], `"char"`) {
d = tree.NewDString(string(t))
} else if strings.HasPrefix(md.columnTypes[cols[si]], "DECIMAL") {
} else if strings.HasPrefix(md.columnTypes[cols[si]], "DECIMAL") ||
strings.HasPrefix(md.columnTypes[cols[si]], "NUMERIC") {
d, err = tree.ParseDDecimal(string(t))
if err != nil {
return err
Expand Down Expand Up @@ -683,6 +697,61 @@ func dumpTableData(w io.Writer, conn *sqlConn, clusterTS string, bmd basicMetada
return g.Wait()
}

// parseArrayElementTypeString returns a column type given a string
// representation of the type. Used by dump. It only supports those
// type names that can appear immediately before `[]`.
//
// Although there is now just mostly one name for each type, pre-2.1
// some types had multiple names (e.g. FLOAT vs FLOAT8). We support
// both here to ensure `cockroach dump` works across versions.
//
// TODO(knz): This should really interface with the parser for DRY.
// This is currently brittle and flawed, see #28948.
func parseArrayElementTypeString(s string) (coltypes.T, error) {
switch s {
case "BOOL", "BOOLEAN":
return coltypes.Bool, nil
case "INT", "INTEGER":
return coltypes.Int, nil
case "INT2", "SMALLINT":
return coltypes.Int2, nil
case "INT4":
return coltypes.Int4, nil
case "INT8", "BIGINT", "INT64":
return coltypes.Int8, nil
case "FLOAT", "FLOAT8", "DOUBLE PRECISION", "DOUBLE":
return coltypes.Float8, nil
case "REAL", "FLOAT4":
return coltypes.Float4, nil
case "DECIMAL", "NUMERIC":
return coltypes.Decimal, nil
case "TIMESTAMP":
return coltypes.Timestamp, nil
case "TIMESTAMPTZ", "TIMESTAMP WITH TIME ZONE":
return coltypes.TimestampWithTZ, nil
case "INTERVAL":
return coltypes.Interval, nil
case "UUID":
return coltypes.UUID, nil
case "INET":
return coltypes.INet, nil
case "DATE":
return coltypes.Date, nil
case "TIME":
return coltypes.Time, nil
case "STRING", "TEXT", "VARCHAR", "CHAR", `"char"`:
return coltypes.String, nil
case "NAME":
return coltypes.Name, nil
case "BYTES", "BLOB", "BYTEA":
return coltypes.Bytes, nil
case "OID":
return coltypes.Oid, nil
default:
return nil, pgerror.NewErrorf(pgerror.CodeInternalError, "unexpected column type %s", s)
}
}

func writeInserts(w io.Writer, tmd tableMetadata, inserts []string) {
fmt.Fprintf(w, "\nINSERT INTO %s (%s) VALUES", &tmd.name.TableName, tmd.columnNames)
for idx, values := range inserts {
Expand Down
12 changes: 9 additions & 3 deletions pkg/cli/dump_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,10 @@ func TestDumpRandom(t *testing.T) {
CREATE TABLE d.t (
rowid int,
i int,
si smallint,
bi bigint,
f float,
fr real,
d date,
m timestamp,
n interval,
Expand All @@ -209,7 +212,7 @@ func TestDumpRandom(t *testing.T) {
u uuid,
ip inet,
j json,
PRIMARY KEY (rowid, i, f, d, m, n, o, e, s, b, u, ip)
PRIMARY KEY (rowid, i, si, bi, f, fr, d, m, n, o, e, s, b, u, ip)
);
SET extra_float_digits = 3;
`, nil); err != nil {
Expand Down Expand Up @@ -278,7 +281,10 @@ func TestDumpRandom(t *testing.T) {
vals := []driver.Value{
_i,
i,
i & 0x7fff, // si
i, // bi
f,
f, // fr
d,
m,
[]byte(n), // intervals come out as `[]byte`s
Expand All @@ -290,14 +296,14 @@ func TestDumpRandom(t *testing.T) {
[]byte(ip.String()),
[]byte(j.String()),
}
if err := conn.Exec("INSERT INTO d.t VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)", vals); err != nil {
if err := conn.Exec("INSERT INTO d.t VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16)", vals); err != nil {
t.Fatal(err)
}
generatedRows = append(generatedRows, vals[1:])
}

check := func(table string) {
q := fmt.Sprintf("SELECT i, f, d, m, n, o, e, s, b, u, ip, j FROM %s ORDER BY rowid", table)
q := fmt.Sprintf("SELECT i, si, bi, f, fr, d, m, n, o, e, s, b, u, ip, j FROM %s ORDER BY rowid", table)
nrows, err := conn.Query(q, nil)
if err != nil {
t.Fatal(err)
Expand Down
40 changes: 0 additions & 40 deletions pkg/sql/sem/tree/parse_array.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,46 +145,6 @@ func (p *parseState) parseElement() error {
return p.result.Append(d)
}

// ArrayElementTypeStringToColType returns a column type given a
// string representation of the type. Used by dump. It only
// supports those type names that can appear immediately before `[]`.
func ArrayElementTypeStringToColType(s string) (coltypes.T, error) {
switch s {
case "BOOL":
return coltypes.Bool, nil
case "INT":
return coltypes.Int, nil
case "FLOAT", "FLOAT8", "DOUBLE PRECISION":
return coltypes.Float8, nil
case "REAL", "FLOAT4":
return coltypes.Float4, nil
case "DECIMAL":
return coltypes.Decimal, nil
case "TIMESTAMP":
return coltypes.Timestamp, nil
case "TIMESTAMPTZ", "TIMESTAMP WITH TIME ZONE":
return coltypes.TimestampWithTZ, nil
case "INTERVAL":
return coltypes.Interval, nil
case "UUID":
return coltypes.UUID, nil
case "INET":
return coltypes.INet, nil
case "DATE":
return coltypes.Date, nil
case "TIME":
return coltypes.Time, nil
case "STRING":
return coltypes.String, nil
case "NAME":
return coltypes.Name, nil
case "BYTES":
return coltypes.Bytes, nil
default:
return nil, pgerror.NewErrorf(pgerror.CodeInternalError, "unexpected column type %s", s)
}
}

// ParseDArrayFromString parses the string-form of constructing arrays, handling
// cases such as `'{1,2,3}'::INT[]`.
func ParseDArrayFromString(evalCtx *EvalContext, s string, t coltypes.T) (*DArray, error) {
Expand Down

0 comments on commit e4f172f

Please sign in to comment.