From 3701beff64db77573f2fb068440d3c349ce53a51 Mon Sep 17 00:00:00 2001 From: Marcel van Lohuizen Date: Wed, 31 Mar 2021 16:25:55 +0200 Subject: [PATCH] encoding/protobuf/jsonpb: add Rewrite* for interpreting JSON in PB terms This allows code that uses the CUE API to modify an ast.Expr or ast.File to conform to a CUE schema, allowing mappings that Protobuf allows, but that are otherwise not allowed by a strict interpretation of the schema. Note that this assumes that enum integers can be mapped to strings with a corresponding #intValue field. This is not yet set by the proto mapping. Issue #606 Change-Id: I71d7bfa9e69f985c1eaaf1c1e20e5a473b882e70 Reviewed-on: https://cue-review.googlesource.com/c/cue/+/9243 Reviewed-by: CUE cueckoo Reviewed-by: Marcel van Lohuizen --- encoding/protobuf/jsonpb/decoder.go | 325 ++++++++++++++++++ encoding/protobuf/jsonpb/decoder_test.go | 133 +++++++ encoding/protobuf/jsonpb/jsonpb.go | 17 + .../jsonpb/testdata/decoder/base64.txtar | 31 ++ .../jsonpb/testdata/decoder/basic.txtar | 97 ++++++ .../jsonpb/testdata/decoder/enums.txtar | 48 +++ .../jsonpb/testdata/decoder/null.txtar | 155 +++++++++ 7 files changed, 806 insertions(+) create mode 100644 encoding/protobuf/jsonpb/decoder.go create mode 100644 encoding/protobuf/jsonpb/decoder_test.go create mode 100644 encoding/protobuf/jsonpb/jsonpb.go create mode 100644 encoding/protobuf/jsonpb/testdata/decoder/base64.txtar create mode 100644 encoding/protobuf/jsonpb/testdata/decoder/basic.txtar create mode 100644 encoding/protobuf/jsonpb/testdata/decoder/enums.txtar create mode 100644 encoding/protobuf/jsonpb/testdata/decoder/null.txtar diff --git a/encoding/protobuf/jsonpb/decoder.go b/encoding/protobuf/jsonpb/decoder.go new file mode 100644 index 000000000..7b616d37a --- /dev/null +++ b/encoding/protobuf/jsonpb/decoder.go @@ -0,0 +1,325 @@ +// Copyright 2021 CUE Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package jsonpb + +import ( + "encoding/base64" + "strings" + + "cuelang.org/go/cue" + "cuelang.org/go/cue/ast" + "cuelang.org/go/cue/ast/astutil" + "cuelang.org/go/cue/errors" + "cuelang.org/go/cue/literal" + "cuelang.org/go/cue/token" + "github.com/cockroachdb/apd/v2" +) + +// Option is an option. +// +// There are currently no options. +type Option func() + +// A Decoder interprets CUE expressions as JSON protobuf encodings +// based on an underlying schema. +// +// It bases the mapping on the underlying CUE type, without consulting Protobuf +// attributes. +// +// Mappings per CUE type: +// for any CUE type: +// null is omitted if null is not specifically allowed. +// bytes: if the expression is a string, it is reinterpreted using a +// base64 encoding. Either standard or URL-safe base64 encoding +// with/without paddings are accepted. +// int: string values are interpreted as integers +// float: string values are interpreted as numbers, and the values "NaN", +// "Infinity", and "-Infinity" are allowed and converted to +// to corresponding error values. +// disjunction of strings: +// this is assumed to represent a protobuf enum value. Strings +// are left as is. For integers, the disjunction is resolved +// by converting it to the string that has a corresponding #intValue +// value. +// {}: JSON objects representing any values will be left as is. +// If the CUE type corresponding to the URL can be determined within +// the module context it will be unified. +// time.Time / time.Duration: +// left as is +// _: left as is. +// +type Decoder struct { + schema cue.Value +} + +// NewDecoder creates a Decoder for the given schema. +func NewDecoder(schema cue.Value, options ...Option) *Decoder { + return &Decoder{schema: schema} +} + +// RewriteFile modifies file, interpreting it in terms of the given schema +// according to the protocol buffer to JSON mapping defined in the protocol +// buffer spec. +// +// RewriteFile is idempotent, calling it multiples times on an expression gives +// the same result. +func (d *Decoder) RewriteFile(file *ast.File) error { + var r rewriter + r.rewriteDecls(d.schema, file.Decls) + return r.errs +} + +// RewriteExpr modifies expr, interpreting it in terms of the given schema +// according to the protocol buffer to JSON mapping defined in the +// protocol buffer spec. +// +// RewriteExpr is idempotent, calling it multiples times on an expression gives +// the same result. +func (d *Decoder) RewriteExpr(expr ast.Expr) (ast.Expr, error) { + var r rewriter + x := r.rewrite(d.schema, expr) + return x, r.errs +} + +type rewriter struct { + errs errors.Error +} + +func (r *rewriter) addErr(err errors.Error) { + r.errs = errors.Append(r.errs, err) +} + +func (r *rewriter) addErrf(p token.Pos, schema cue.Value, format string, args ...interface{}) { + format = "%s: " + format + args = append([]interface{}{schema.Path()}, args...) + r.addErr(errors.Newf(p, format, args...)) +} + +func (r *rewriter) rewriteDecls(schema cue.Value, decls []ast.Decl) { + for _, f := range decls { + field, ok := f.(*ast.Field) + if !ok { + continue + } + sel := cue.Label(field.Label) + if !sel.IsString() { + continue + } + + v := schema.LookupPath(cue.MakePath(sel)) + if !v.Exists() { + f := schema.Template() + if f == nil { + continue + } + v = f(sel.String()) + } + if !v.Exists() { + continue + } + + field.Value = r.rewrite(v, field.Value) + } +} + +func (r *rewriter) rewrite(schema cue.Value, expr ast.Expr) (x ast.Expr) { + defer func() { + if expr != x && x != nil { + astutil.CopyMeta(x, expr) + } + }() + + switch x := expr.(type) { + case *ast.BasicLit: + if x.Kind != token.NULL { + break + } + if schema.IncompleteKind()&cue.NullKind != 0 { + break + } + switch v, _ := schema.Default(); { + case v.IsConcrete(): + if x, _ := v.Syntax(cue.Final()).(ast.Expr); x != nil { + return x + } + default: // default value for type + if x := zeroValue(schema, x); x != nil { + return x + } + } + + case *ast.StructLit: + r.rewriteDecls(schema, x.Elts) + return x + + case *ast.ListLit: + elem, _ := schema.Elem() + iter, _ := schema.List() + for i, e := range x.Elts { + v := elem + if iter.Next() { + v = iter.Value() + } + if !v.Exists() { + break + } + x.Elts[i] = r.rewrite(v, e) + } + + return x + } + + switch schema.IncompleteKind() { + case cue.IntKind, cue.FloatKind, cue.NumberKind: + x, q, str := stringValue(expr) + if x == nil || !q.IsDouble() { + break + } + + var info literal.NumInfo + if err := literal.ParseNum(str, &info); err != nil { + break + } + x.Value = str + x.Kind = token.FLOAT + if info.IsInt() { + x.Kind = token.INT + } + + case cue.BytesKind: + x, q, str := stringValue(expr) + if x == nil && q.IsDouble() { + break + } + + var b []byte + var err error + for _, enc := range base64Encodings { + if b, err = enc.DecodeString(str); err == nil { + break + } + } + if err != nil { + r.addErrf(expr.Pos(), schema, "failed to decode base64: %v", err) + return expr + } + + quoter := literal.Bytes + if q.IsMulti() { + ws := q.Whitespace() + tabs := (strings.Count(ws, " ")+3)/4 + strings.Count(ws, "\t") + quoter = quoter.WithTabIndent(tabs) + } + x.Value = quoter.Quote(string(b)) + return x + + case cue.StringKind: + if s, ok := expr.(*ast.BasicLit); ok && s.Kind == token.INT { + var info literal.NumInfo + if err := literal.ParseNum(s.Value, &info); err != nil || !info.IsInt() { + break + } + var d apd.Decimal + if err := info.Decimal(&d); err != nil { + break + } + enum, err := d.Int64() + if err != nil { + r.addErrf(expr.Pos(), schema, "invalid enum index: %v", err) + return expr + } + op, values := schema.Expr() + if op != cue.OrOp { + values = []cue.Value{schema} // allow single values. + } + for _, v := range values { + i, err := v.LookupPath(cue.MakePath(cue.Def("#intValue"))).Int64() + if err == nil && i == enum { + str, err := v.String() + if err != nil { + r.addErr(errors.Wrapf(err, v.Pos(), "invalid string enum")) + return expr + } + s.Kind = token.STRING + s.Value = literal.String.Quote(str) + + return s + } + } + r.addErrf(expr.Pos(), schema, + "could not locate integer enum value %d", enum) + } + + case cue.StructKind, cue.TopKind: + // TODO: Detect and mix in type. + } + return expr +} + +func zeroValue(v cue.Value, x *ast.BasicLit) ast.Expr { + switch v.IncompleteKind() { + case cue.StringKind: + x.Kind = token.STRING + x.Value = `""` + + case cue.BytesKind: + x.Kind = token.STRING + x.Value = `''` + + case cue.BoolKind: + x.Kind = token.FALSE + x.Value = "false" + + case cue.NumberKind, cue.IntKind, cue.FloatKind: + x.Kind = token.INT + x.Value = "0" + + case cue.StructKind: + return ast.NewStruct() + + case cue.ListKind: + return &ast.ListLit{} + + default: + return nil + } + return x +} + +func stringValue(x ast.Expr) (b *ast.BasicLit, q literal.QuoteInfo, str string) { + b, ok := x.(*ast.BasicLit) + if !ok || b.Kind != token.STRING { + return nil, q, "" + } + q, p, _, err := literal.ParseQuotes(b.Value, b.Value) + if err != nil { + return nil, q, "" + } + + str, err = q.Unquote(b.Value[p:]) + if err != nil { + return nil, q, "" + } + + return b, q, str +} + +// These are all the allowed base64 encodings. +var base64Encodings = []base64.Encoding{ + *base64.StdEncoding, + *base64.URLEncoding, + *base64.RawStdEncoding, + *base64.RawURLEncoding, +} diff --git a/encoding/protobuf/jsonpb/decoder_test.go b/encoding/protobuf/jsonpb/decoder_test.go new file mode 100644 index 000000000..779c5efc3 --- /dev/null +++ b/encoding/protobuf/jsonpb/decoder_test.go @@ -0,0 +1,133 @@ +// Copyright 2021 CUE Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package jsonpb + +import ( + "strings" + "testing" + + "cuelang.org/go/cue" + "cuelang.org/go/cue/ast" + "cuelang.org/go/cue/ast/astutil" + "cuelang.org/go/cue/errors" + "cuelang.org/go/cue/format" + "cuelang.org/go/cue/parser" + "cuelang.org/go/encoding/json" + "cuelang.org/go/encoding/yaml" + "cuelang.org/go/internal/cuetest" + "cuelang.org/go/internal/cuetxtar" +) + +func TestParse(t *testing.T) { + test := cuetxtar.TxTarTest{ + Root: "./testdata/decoder", + Name: "jsonpb", + Update: cuetest.UpdateGoldenFiles, + } + + r := cue.Runtime{} + + test.Run(t, func(t *cuetxtar.Test) { + // TODO: use high-level API. + + var schema cue.Value + var file *ast.File + + for _, f := range t.Archive.Files { + switch { + case f.Name == "schema.cue": + inst, err := r.Compile(f.Name, f.Data) + if err != nil { + t.WriteErrors(errors.Promote(err, "test")) + return + } + schema = inst.Value() + continue + + case strings.HasPrefix(f.Name, "out/"): + continue + + case strings.HasSuffix(f.Name, ".cue"): + f, err := parser.ParseFile(f.Name, f.Data, parser.ParseComments) + if err != nil { + t.Fatal(err) + } + file = f + + case strings.HasSuffix(f.Name, ".json"): + x, err := json.Extract(f.Name, f.Data) + if err != nil { + t.Fatal(err) + } + file, err = astutil.ToFile(x) + if err != nil { + t.Fatal(err) + } + + case strings.HasSuffix(f.Name, ".yaml"): + f, err := yaml.Extract(f.Name, f.Data) + if err != nil { + t.Fatal(err) + } + file = f + } + + w := t.Writer(f.Name) + err := NewDecoder(schema).RewriteFile(file) + if err != nil { + errors.Print(w, err, nil) + continue + } + + b, err := format.Node(file) + if err != nil { + t.Fatal(err) + } + _, _ = w.Write(b) + } + }) +} + +// For debugging purposes: DO NOT REMOVE. +func TestX(t *testing.T) { + const schema = ` + + ` + const data = ` +` + if strings.TrimSpace(data) == "" { + t.Skip() + } + var r cue.Runtime + inst, err := r.Compile("schema", schema) + if err != nil { + t.Fatal(err) + } + + file, err := parser.ParseFile("data", data) + if err != nil { + t.Fatal(err) + } + + if err := NewDecoder(inst.Value()).RewriteFile(file); err != nil { + t.Fatal(err) + } + + b, err := format.Node(file) + if err != nil { + t.Fatal(err) + } + t.Error(string(b)) +} diff --git a/encoding/protobuf/jsonpb/jsonpb.go b/encoding/protobuf/jsonpb/jsonpb.go new file mode 100644 index 000000000..df07a380d --- /dev/null +++ b/encoding/protobuf/jsonpb/jsonpb.go @@ -0,0 +1,17 @@ +// Copyright 2021 CUE Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package jsonpb rewrites a CUE expression based upon the Protobuf +// interpretation of JSON. +package jsonpb diff --git a/encoding/protobuf/jsonpb/testdata/decoder/base64.txtar b/encoding/protobuf/jsonpb/testdata/decoder/base64.txtar new file mode 100644 index 000000000..7476699d4 --- /dev/null +++ b/encoding/protobuf/jsonpb/testdata/decoder/base64.txtar @@ -0,0 +1,31 @@ +-- schema.cue -- +b: [string]: bytes + +-- std.cue -- +b: hello: "SGVsbG8sIOS4lueVjA==" +b: noPad: "SGVsbG8sIOS4lueVjA" +b: bar: "c29tZSBkYXRhIHdpdGggACBhbmQg77u/" + +// A large one-line text. +b: multi: """ + TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlz + IHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2Yg + dGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGlu + dWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRo + ZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4= + """ + +-- url.cue -- +b: bar: "c29tZSBkYXRhIHdpdGggACBhbmQg77u_" + +-- out/jsonpb/std.cue -- +b: hello: 'Hello, 世界' +b: noPad: 'Hello, 世界' +b: bar: 'some data with \x00 and \ufeff' + +// A large one-line text. +b: multi: ''' + Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure. + ''' +-- out/jsonpb/url.cue -- +b: bar: 'some data with \x00 and \ufeff' diff --git a/encoding/protobuf/jsonpb/testdata/decoder/basic.txtar b/encoding/protobuf/jsonpb/testdata/decoder/basic.txtar new file mode 100644 index 000000000..cf8fa7f49 --- /dev/null +++ b/encoding/protobuf/jsonpb/testdata/decoder/basic.txtar @@ -0,0 +1,97 @@ +-- schema.cue -- +a: int + +strings: { + c: string + d: "foo" | "bar" +} + +lists: { + e: [...int] + f: [int, int] + g: [int, int, ...int] + h: [int, int] +} + +structs: [string]: { + a: int +} + +-- data.json -- +{ + "a": "44", + "strings": { + "c": "cc", + "d": "foo" + }, + "lists": { + "e": ["1"], + "f": ["2"], + "g": ["3", "4", "5"], + "h": ["3", "4", "5"] + }, + "structs": { + "field": { + "a": "1", + "b": "2" + } + }, + "tail": {} +} +-- data.cue -- +a: "44" +strings: { + c: "cc" + d: "foo" +} +lists: { + e: ["1"] + f: ["2"] + g: ["3", "4", "5"] + h: ["3", "4", "5"] // Last element should not be rewritten! +}, +structs: { + field: { + a: "1" + b: "2" + } +} +tail: {} +-- out/jsonpb/data.json -- +a: 44 +strings: { + c: "cc" + d: "foo" +} +lists: { + e: [1] + f: [2] + g: [3, 4, 5] + h: [3, 4, "5"] +} +structs: { + field: { + a: 1 + b: "2" + } +} +tail: {} +-- out/jsonpb/data.cue -- +a: 44 +strings: { + c: "cc" + d: "foo" +} +lists: { + e: [1] + f: [2] + g: [3, 4, 5] + h: [3, 4, "5"] // Last element should not be rewritten! +} +structs: { + field: { + a: 1 + b: "2" + } +} +tail: {} diff --git a/encoding/protobuf/jsonpb/testdata/decoder/enums.txtar b/encoding/protobuf/jsonpb/testdata/decoder/enums.txtar new file mode 100644 index 000000000..5c4d1a2b6 --- /dev/null +++ b/encoding/protobuf/jsonpb/testdata/decoder/enums.txtar @@ -0,0 +1,48 @@ +-- schema.cue -- +enum: [string]: { + "foo" + #intValue: 1 +} | { + "bar" + #intValue: 2 +} + +singleEnum: { "single", #intValue: 1 } + +badEnum: { string, #intValue: 1 } | { "two", #intValue: 2 } + +-- data.cue -- +enum: asIs: "foo" +enum: asIsUnknown: "foobar" + +enum: numExistFoo: 1 +enum: numExistBar: 2 + +singleEnum: 1 + +-- errors.cue -- +enum: numNotExists: 3 + +enum: numNotExists: 4 + +enum: tooLarge: 4_111_222_333_444_555_666_777_888_999 + +badEnum: 1 + +-- out/jsonpb/data.cue -- +enum: asIs: "foo" +enum: asIsUnknown: "foobar" + +enum: numExistFoo: "foo" +enum: numExistBar: "bar" + +singleEnum: "single" +-- out/jsonpb/errors.cue -- +enum.numNotExists: could not locate integer enum value 3: + errors.cue:1:21 +enum.numNotExists: could not locate integer enum value 4: + errors.cue:3:21 +enum.tooLarge: invalid enum index: 4111222333444555666777888999: greater than max int64: + errors.cue:5:17 +badEnum: invalid string enum: non-concrete value string: + schema.cue:11:10 diff --git a/encoding/protobuf/jsonpb/testdata/decoder/null.txtar b/encoding/protobuf/jsonpb/testdata/decoder/null.txtar new file mode 100644 index 000000000..89f1e6092 --- /dev/null +++ b/encoding/protobuf/jsonpb/testdata/decoder/null.txtar @@ -0,0 +1,155 @@ +-- schema.cue -- +a0: int +a1: 1 | *2 + +a2: string +a3: "a" | *"b" + +a4: bytes +a5: *'a' | 'b' + +a6: [...int] +a7: *[0] | [...int] + +a8: bool +a9: *true | false + +a10: null + +a11: null +a12: null + +-- data.yaml -- +# comment a0 +a0: null + +# comment a1 +a1: null + +# comment a2 +a2: null + +# comment a3 +a3: null + +# comment a4 +a4: null + +# comment a5 +a5: null + +# comment a6 +a6: null + +# comment a7 +a7: null + +# comment a8 +a8: null + +# comment a9 +a9: null + +# comment a10 +a10: null + +-- data.cue -- +// comment a0 +a0: null + +// comment a1 +a1: null + +// comment a2 +a2: null + +// comment a3 +a3: null + +// comment a4 +a4: null + +// comment a5 +a5: null + +// comment a6 +a6: null + +// comment a7 +a7: null + +// comment a8 +a8: null + +// comment a9 +a9: null + +// comment a10 +a10: null + +-- out/jsonpb/data.yaml -- + // comment a0 +a0: 0 + +// comment a1 +a1: 2 + +// comment a2 +a2: "" + +// comment a3 +a3: "b" + +// comment a4 +a4: '' + +// comment a5 +a5: 'a' + +// comment a6 +a6: [] + +// comment a7 +a7: [0] + +// comment a8 +a8: false + +// comment a9 +a9: true + +// comment a10 +a10: null +-- out/jsonpb/data.cue -- + // comment a0 +a0: 0 + +// comment a1 +a1: 2 + +// comment a2 +a2: "" + +// comment a3 +a3: "b" + +// comment a4 +a4: '' + +// comment a5 +a5: 'a' + +// comment a6 +a6: [] + +// comment a7 +a7: [0] + +// comment a8 +a8: false + +// comment a9 +a9: true + +// comment a10 +a10: null