From 54eafe07a1031752914f8daa8ba2a6619c72a50b Mon Sep 17 00:00:00 2001 From: Mandana Vaziri Date: Wed, 17 Feb 2021 23:17:08 -0500 Subject: [PATCH] New support to upload a directory of JSON schema file(s) via "opa eval --schema". Directory can contain schema file(s) for policy input document(s), and schema file(s) for contextual data document(s). These schema files are used then to improve static type checking and to get more precise error reports as you develop Rego code. Also, there is new support for adding annotations on Rules to specify the schemas to be used specifically for type checking the expressions within the scope of that Rule. It helps address issues with schema overloading, and provides even more precise type error reports for a Rego developer. Also, added support for annotation processing when loading via bundles. Co-authored-by: @vazirim Mandana Vaziri mvaziri@us.ibm.com Co-authored-by: @aavarghese Ansu Varghese avarghese@us.ibm.com Co-authored-by: @tsandall Torin Sandall torinsandall@gmail.com Signed-off-by: Mandana Vaziri --- ast/check.go | 138 +++++++++ ast/check_test.go | 668 ++++++++++++++++++++++++++++++++++++++++ ast/compile.go | 62 ++-- ast/compile_test.go | 8 +- ast/env.go | 28 +- ast/parser.go | 101 +++++- ast/parser_ext.go | 36 ++- ast/parser_test.go | 342 ++++++++++++++++++++ ast/policy.go | 63 +++- bundle/bundle.go | 9 +- cmd/eval.go | 102 +++++- cmd/eval_test.go | 47 ++- cmd/flags.go | 2 +- docs/content/schemas.md | 274 ++++++++++++++-- go.mod | 2 + go.sum | 6 + loader/loader.go | 43 ++- rego/rego.go | 14 +- rego/rego_test.go | 5 +- types/types.go | 10 + 20 files changed, 1860 insertions(+), 100 deletions(-) diff --git a/ast/check.go b/ast/check.go index b77a291a27..124f76dadb 100644 --- a/ast/check.go +++ b/ast/check.go @@ -146,7 +146,145 @@ func (tc *typeChecker) checkLanguageBuiltins(env *TypeEnv, builtins map[string]* return env } +// override takes a type t and returns a type obtained from t where the path represented by ref within it has type o (overriding the original type of that path) +func override(ref Ref, t types.Type, o types.Type, rule *Rule) (types.Type, *Error) { + newStaticProps := []*types.StaticProperty{} + obj, ok := t.(*types.Object) + if !ok { + newType, err := getObjectType(ref, o, rule, types.NewDynamicProperty(types.A, types.A)) + if err != nil { + return nil, err + } + return newType, nil + } + found := false + if ok { + staticProps := obj.StaticProperties() + for _, prop := range staticProps { + valueCopy := prop.Value + key, err := InterfaceToValue(prop.Key) + if err != nil { + return nil, NewError(TypeErr, rule.Location, "unexpected error in override: %s", err.Error()) + } + if len(ref) > 0 && ref[0].Value.Compare(key) == 0 { + found = true + if len(ref) == 1 { + valueCopy = o + } else { + newVal, err := override(ref[1:], valueCopy, o, rule) + if err != nil { + return nil, err + } + valueCopy = newVal + } + } + newStaticProps = append(newStaticProps, types.NewStaticProperty(prop.Key, valueCopy)) + } + } + + // ref[0] is not a top-level key in staticProps, so it must be added + if !found { + newType, err := getObjectType(ref, o, rule, obj.DynamicProperties()) + if err != nil { + return nil, err + } + newStaticProps = append(newStaticProps, newType.StaticProperties()...) + } + return types.NewObject(newStaticProps, obj.DynamicProperties()), nil +} + +func getKeys(ref Ref, rule *Rule) ([]interface{}, *Error) { + keys := []interface{}{} + for _, refElem := range ref { + key, err := JSON(refElem.Value) + if err != nil { + return nil, NewError(TypeErr, rule.Location, "error getting key from value: %s", err.Error()) + } + keys = append(keys, key) + } + return keys, nil +} + +func getObjectTypeRec(keys []interface{}, o types.Type, d *types.DynamicProperty) *types.Object { + if len(keys) == 1 { + staticProps := []*types.StaticProperty{types.NewStaticProperty(keys[0], o)} + return types.NewObject(staticProps, d) + } + + staticProps := []*types.StaticProperty{types.NewStaticProperty(keys[0], getObjectTypeRec(keys[1:], o, d))} + return types.NewObject(staticProps, d) +} + +func getObjectType(ref Ref, o types.Type, rule *Rule, d *types.DynamicProperty) (*types.Object, *Error) { + keys, err := getKeys(ref, rule) + if err != nil { + return nil, err + } + return getObjectTypeRec(keys, o, d), nil +} + +func (tc *typeChecker) getRuleAnnotation(rule *Rule) (sannots []SchemaAnnotation) { + for _, annot := range rule.Module.Annotation { + schemaAnnots, ok := annot.(*SchemaAnnotations) + if ok && schemaAnnots.Scope == ruleScope && schemaAnnots.Rule == rule { + return schemaAnnots.SchemaAnnotation + } + } + return nil +} + +// Annotations must immediately precede the rule definition +func (tc *typeChecker) processAnnotation(annot SchemaAnnotation, env *TypeEnv, rule *Rule) (Ref, types.Type, *Error) { + if env.schemaSet == nil || env.schemaSet.ByPath == nil { + return nil, nil, NewError(TypeErr, rule.Location, "schemas need to be supplied for the annotation: %s", annot.Schema) + } + schemaRef, err := ParseRef(annot.Schema) + if err != nil { + return nil, nil, NewError(TypeErr, rule.Location, "schema is not well formed in annotation: %s", annot.Schema) + } + schema, ok := env.schemaSet.ByPath.Get(schemaRef) + if !ok { + return nil, nil, NewError(TypeErr, rule.Location, "schema does not exist for given path in annotation: %s", schemaRef.String()) + } + newType, err := setTypesWithSchema(schema) + if err != nil { + return nil, nil, NewError(TypeErr, rule.Location, err.Error()) + } + ref, err := ParseRef(annot.Path) + if err != nil { + return nil, nil, NewError(TypeErr, rule.Location, err.Error()) + } + + return ref, newType, nil +} + func (tc *typeChecker) checkRule(env *TypeEnv, rule *Rule) { + if rule.Module.Annotation != nil { + schemaAnnots := tc.getRuleAnnotation(rule) + if schemaAnnots != nil { + for _, schemaAnnot := range schemaAnnots { + ref, refType, err := tc.processAnnotation(schemaAnnot, env, rule) + if err != nil { + tc.err([]*Error{err}) + continue + } + prefixRef, t := env.GetPrefix(ref) + env = env.wrap() + if t == nil { + env.tree.Put(ref, refType) + } else if len(prefixRef) == len(ref) { + env.tree.Put(ref, refType) + } else { + newType, err := override(ref[len(prefixRef):], t, refType, rule) + if err != nil { + tc.err([]*Error{err}) + continue + } + env.tree.Put(prefixRef, newType) + } + } + } + } cpy, err := tc.CheckBody(env, rule.Body) diff --git a/ast/check_test.go b/ast/check_test.go index 587c64a511..b905654839 100644 --- a/ast/check_test.go +++ b/ast/check_test.go @@ -1215,3 +1215,671 @@ func newTestEnv(rs []string) *TypeEnv { return env } + +const inputSchema = `{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "http://example.com/example.json", + "type": "object", + "title": "The root schema", + "description": "The root schema comprises the entire JSON document.", + "default": {}, + "examples": [ + { + "user": "alice", + "operation": "write" + } + ], + "required": [ + "user", + "operation" + ], + "properties": { + "user": { + "$id": "#/properties/user", + "type": "string", + "title": "The user schema", + "description": "An explanation about the purpose of this instance.", + "default": "", + "examples": [ + "alice" + ] + }, + "operation": { + "$id": "#/properties/operation", + "type": "string", + "title": "The operation schema", + "description": "An explanation about the purpose of this instance.", + "default": "", + "examples": [ + "write" + ] + } + }, + "additionalProperties": true +}` + +const inputSchema2 = `{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "http://example.com/example.json", + "type": "object", + "title": "The root schema", + "description": "The root schema comprises the entire JSON document.", + "default": {}, + "examples": [ + { + "operation": "read" + } + ], + "required": [ + "operation" + ], + "properties": { + "operation": { + "$id": "#/properties/operation", + "type": "string", + "title": "The operation schema", + "description": "An explanation about the purpose of this instance.", + "default": "", + "examples": [ + "read" + ] + } + }, + "additionalProperties": true +}` + +const dataSchema = `{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "http://example.com/example.json", + "type": "object", + "title": "The root schema", + "description": "The root schema comprises the entire JSON document.", + "default": {}, + "examples": [ + { + "alice": [ + "read", + "write" + ], + "bob": [ + "read" + ] + } + ], + "required": [ + "alice", + "bob" + ], + "properties": { + "alice": { + "$id": "#/properties/alice", + "type": "array", + "title": "The alice schema", + "description": "An explanation about the purpose of this instance.", + "default": [], + "examples": [ + [ + "read", + "write" + ] + ], + "additionalItems": false, + "items": { + "$id": "#/properties/alice/items", + "type": "string", + "title": "The items schema", + "description": "An explanation about the purpose of this instance.", + "default": "", + "examples": [ + [ + "read", + "write" + ] + ] + } + }, + "bob": { + "$id": "#/properties/bob", + "type": "array", + "title": "The bob schema", + "description": "An explanation about the purpose of this instance.", + "default": [], + "examples": [ + [ + "read" + ] + ], + "additionalItems": false, + "items": { + "$id": "#/properties/bob/items", + "type": "string", + "title": "The items schema", + "description": "An explanation about the purpose of this instance.", + "default": "", + "examples": [ + [ + "read" + ] + ] + } + } + }, + "additionalProperties": true +}` + +func TestCheckAnnotationRules(t *testing.T) { + + var ischema interface{} + _ = util.Unmarshal([]byte(inputSchema), &ischema) + + var ischema2 interface{} + _ = util.Unmarshal([]byte(inputSchema2), &ischema2) + + var dschema interface{} + _ = util.Unmarshal([]byte(dataSchema), &dschema) + + module1 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - input: schema["input"] +# - data.acl: schema["acl-schema"] +allow { + access = data.acl[input.user] + access[_] == input.operation +}` + + module2 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - input: schema["input"] +# - input: schema["whocan-input-schema"] +# - data.acl: schema["acl-schema"] +whocan[user] { + access = acl[user] + access[_] == input.operation +}` + + module3 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - input: schema["input"] +# - input: schema["whocan-input-schema"] +# - data.acl: schema["acl-schema"] +allow { + access = data.acl[input.user] + access[_] == input.operation +}` + + module4 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - input: schema["badpath"] +whocan[user] { + access = acl[user] + access[_] == input.operation +}` + + module5 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - badref: schema["whocan-input-schema"] +whocan[user] { + access = acl[user] + access[_] == input.operation +}` + + module6 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - data/acl: schema/acl-schema +whocan[user] { + access = acl[user] + access[_] == input.operation +}` + + module7 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - input= schema["whocan-input-schema"] +whocan[user] { + access = acl[user] + access[_] == input.operation +}` + + module8 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - input: schema["input"] +# - data.acl: schema["acl-schema"] +# - input.apple.orange: schema["input"] +allow { + access = data.acl[input.user] + access[_] == input.operation + input.apple.banana +}` + + module9 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - input: schema["input"] +# - data.acl: schema["acl-schema"] +# - input.apple.orange: schema["input"] +# - input.apple.orange.banana: schema["acl-schema"] +allow { + access = data.acl[input.user] + access[_] == input.operation + input.apple.orange.banana +}` + + module10 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - input: schema["input"] +# - input.apple.orange: schema["input"] +# - input.apple.orange.banana: schema["acl-schema"] +allow { + access = data.acl[input.user] + access[_] == input.operation + input.apple.orange.banana.fruit +}` + + module11 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - input: schema["input"] +# - input.apple.orange: schema["input"] +# - input.apple.orange: schema["acl-schema"] +allow { + access = data.acl[input.user] + access[_] == input.operation + input.apple.orange.bob + input.apple.orange.user +}` + + module12 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - input: schema["input"] +# - input: schema["acl-schema"] +allow { + access = data.acl[input.user] +}` + + module13 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - input: schema["input"] +# - input.apple["orange"]: schema["input"] +allow { + access = data.acl[input.user] + input.apple.orange.fruit +}` + + module14 := ` +package policy + +import data.acl +import input + +# METADATA +# scope: rule +# schemas: +# - input.request.object: schema["acl-schema"] +deny[msg] { + input.request.kind.kind == "Pod" + image := input.request.object.spec.typo.containers[_].image + not startswith(image, "hooli.com/") +}` + + module15 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - data.acl: schema["acl-schema"] +allow { + access = data.acl[input.user] + access[_] == input.operation + input.apple.orange.banana +}` + + module16 := ` +package policy + +import data.acl +import input + +# METADATA +# scope: rule +# schemas: +# - data.acl: schema["acl-schema"] +deny[msg] { + input.request.kind.kinds == "Pod" + image := input.request.object.spec.containers[_].image + not startswith(image, "hooli.com/") + data.blah +}` + + module17 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - input: schema["acl-schema"] +allow { + input.alice +} + +deny[msg] { + input.foo +}` + + module18 := ` +package policy + +import data.acl +import input + +# METADATA +# scope: rule +# schemas: +# - input: schema["input"] +# - input.apple.banana: schema["input"] +deny[msg] { + input.apple.banana +} + +deny1[msg] { + input.apple.banana.foo +}` + + module19 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - data.acl: schema["acl-schema"] +# - data.acl.foo: schema["input"] +allow { + access = data.acl[input.user] + access[_] == input.operation + input.apple.orange.banana + data.acl.foo.blah +}` + + module20 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - input: schema["input"] +# - data.acl: schema["acl-schema"] +allow { + access = data.acl[input.user] + access[_] == input.operation + data.acl.foo +}` + + module21 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - input: schema["input"] +# - data.acl: schema["acl-schema"] +allow { + access = data.acl[input.user] + access[_] == input.operation +} + +# METADATA for whocan rule +# scope: rule +# schemas: +# - input: schema["whocan-input-schema"] +# - data.acl: schema["acl-schema"] +whocan[user] { + access = acl[user] + access[_] == input.operation +}` + + module22 := ` +package policy + +import data.acl +import input + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - input: schema["input"] +# - data.acl: schema["acl-schema"] +allow { + access = data.acl[input.user] + access[_] == input.operation + data.foo + data.acl.foo +} + +# METADATA for whocan rule +# scope: rule +# schemas: +# - input: schema["whocan-input-schema"] +# - data.acl: schema["acl-schema"] +whocan[user] { + access = acl[user] + access[_] == input.operation.foo + +}` + + schemaSet := NewSchemaSet() + schemaSet.ByPath.Put(MustParseRef("schema.input"), ischema) + schemaSet.ByPath.Put(MustParseRef(`schema["whocan-input-schema"]`), ischema2) + schemaSet.ByPath.Put(MustParseRef(`schema["acl-schema"]`), dschema) + + tests := map[string]struct { + module string + schemaSet *SchemaSet + err string + }{ + "data and input annotations": {module: module1, schemaSet: schemaSet}, + "correct data override": {module: module2, schemaSet: schemaSet}, + "incorrect data override": {module: module3, schemaSet: schemaSet, err: "undefined ref: input.user"}, + "schema not exist in annotation path": {module: module4, schemaSet: schemaSet, err: "schema does not exist for given path in annotation"}, + "non ref in annotation": {module: module5, schemaSet: schemaSet, err: "expected ref but got"}, + "Ill-structured annotation with bad path": {module: module6, schemaSet: schemaSet, err: "schema is not well formed in annotation"}, + "Ill-structured (invalid) annotation": {module: module7, schemaSet: schemaSet, err: "unable to unmarshall the metadata yaml in comment"}, + "empty schema set": {module: module1, schemaSet: nil, err: "schemas need to be supplied for the annotation"}, + "overriding ref with length greater than one and not existing": {module: module8, schemaSet: schemaSet, err: "undefined ref: input.apple.banana"}, + "overriding ref with length greater than one and existing prefix": {module: module9, schemaSet: schemaSet}, + "overriding ref with length greater than one and existing prefix with type error": {module: module10, schemaSet: schemaSet, err: "undefined ref: input.apple.orange.banana.fruit"}, + "overriding ref with length greater than one and existing ref": {module: module11, schemaSet: schemaSet, err: "undefined ref: input.apple.orange.user"}, + "overriding ref of size one": {module: module12, schemaSet: schemaSet, err: "undefined ref: input.user"}, + "overriding annotation written with brackets": {module: module13, schemaSet: schemaSet, err: "undefined ref: input.apple.orange.fruit"}, + "overriding strict": {module: module14, schemaSet: schemaSet, err: "undefined ref: input.request.object.spec.typo"}, + "data annotation but no input schema": {module: module15, schemaSet: schemaSet}, + "data schema annotation does not overly restrict data expression": {module: module16, schemaSet: schemaSet}, + "correct defer annotation on another rule has no effect base case": {module: module17, schemaSet: schemaSet}, + "correct defer annotation on another rule has no effect": {module: module18, schemaSet: schemaSet}, + "overriding ref with data prefix": {module: module19, schemaSet: schemaSet, err: "data.acl.foo.blah"}, + "data annotation type error": {module: module20, schemaSet: schemaSet, err: "data.acl.foo"}, + "more than one rule with metadata": {module: module21, schemaSet: schemaSet}, + "more than one rule with metadata with type error": {module: module22, schemaSet: schemaSet, err: "undefined ref"}, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + mod, err := ParseModuleWithOpts("test.rego", tc.module, ParserOptions{ + ProcessAnnotation: true, + }) + if err != nil { + if !strings.Contains(err.Error(), tc.err) { + t.Fatalf("Unexpected parse module error when processing annotations: %v", err) + } + return + } + + var elems []util.T + for _, rule := range mod.Rules { + elems = append(elems, rule) + for next := rule.Else; next != nil; next = next.Else { + next.Module = mod + elems = append(elems, next) + } + } + + oldTypeEnv := newTypeChecker().checkLanguageBuiltins(nil, BuiltinMap).WithSchemas(tc.schemaSet) + typeenv, errors := newTypeChecker().CheckTypes(oldTypeEnv, elems) + if len(errors) > 0 { + for _, e := range errors { + if tc.err == "" || !strings.Contains(e.Error(), tc.err) { + t.Fatalf("Unexpected check rule error when processing annotations: %v", e) + } + } + return + } else if tc.err != "" { + t.Fatalf("Expected err: %v but no error from check types", tc.err) + } + + if oldTypeEnv.tree.children != nil && typeenv.next.tree.children != nil && (typeenv.next.tree.children.Len() != oldTypeEnv.tree.children.Len()) { + t.Fatalf("Unexpected type env") + } + + }) + } +} diff --git a/ast/compile.go b/ast/compile.go index 3f9b9ea290..998e25cd76 100644 --- a/ast/compile.go +++ b/ast/compile.go @@ -105,13 +105,27 @@ type Compiler struct { unsafeBuiltinsMap map[string]struct{} // user-supplied set of unsafe built-ins functions to block (deprecated: use capabilities) comprehensionIndices map[*Term]*ComprehensionIndex // comprehension key index initialized bool // indicates if init() has been called - schemaSet *SchemaSet - debug debug.Debug // emits debug information produced during compilation + debug debug.Debug // emits debug information produced during compilation + schemaSet *SchemaSet // user-supplied schemas for input and data documents } // SchemaSet holds a map from a path to a schema type SchemaSet struct { - ByPath map[string]interface{} + ByPath *util.HashMap +} + +// NewSchemaSet returns an empty SchemaSet. +func NewSchemaSet() *SchemaSet { + + eqFunc := func(a, b util.T) bool { + return a.(Ref).Equal(b.(Ref)) + } + + hashFunc := func(x util.T) int { return x.(Ref).Hash() } + + return &SchemaSet{ + ByPath: util.NewHashMap(eqFunc, hashFunc), + } } // CompilerStage defines the interface for stages in the compiler. @@ -955,30 +969,12 @@ func setTypesWithSchema(schema interface{}) (types.Type, error) { return newtype, nil } -func (c *Compiler) setInputType() { - if c.schemaSet != nil { - if c.schemaSet.ByPath != nil { - schema := c.schemaSet.ByPath["input"] - if schema != nil { - newtype, err := setTypesWithSchema(schema) - if err != nil { - c.err(NewError(TypeErr, nil, err.Error())) - } - c.TypeEnv.tree.PutOne(VarTerm("input").Value, newtype) - } - } - } -} - // checkTypes runs the type checker on all rules. The type checker builds a // TypeEnv that is stored on the compiler. func (c *Compiler) checkTypes() { // Recursion is caught in earlier step, so this cannot fail. sorted, _ := c.Graph.Sort() checker := newTypeChecker().WithVarRewriter(rewriteVarsInRef(c.RewrittenVars)) - - c.setInputType() - env, errs := checker.CheckTypes(c.TypeEnv, sorted) for _, err := range errs { c.err(err) @@ -1055,9 +1051,30 @@ func (c *Compiler) init() { tc := newTypeChecker() c.TypeEnv = tc.checkLanguageBuiltins(nil, c.builtins) + c.setSchemas() + c.initialized = true } +func (c *Compiler) setSchemas() { + if c.schemaSet != nil { + if c.schemaSet.ByPath != nil { + // First, set the schemaSet in the type environment + c.TypeEnv.WithSchemas(c.schemaSet) + + // Second, set the schema for the input globally + schema, ok := c.schemaSet.ByPath.Get(InputRootRef) + if ok { + newtype, err := setTypesWithSchema(schema) + if err != nil { + c.err(NewError(TypeErr, nil, err.Error())) + } + c.TypeEnv.tree.PutOne(VarTerm("input").Value, newtype) + } + } + } +} + func (c *Compiler) err(err *Error) { if c.maxErrs > 0 && len(c.Errors) >= c.maxErrs { c.Errors = append(c.Errors, errLimitReached) @@ -1672,9 +1689,6 @@ func (qc *queryCompiler) checkSafety(_ *QueryContext, body Body) (Body, error) { func (qc *queryCompiler) checkTypes(qctx *QueryContext, body Body) (Body, error) { var errs Errors checker := newTypeChecker().WithVarRewriter(rewriteVarsInRef(qc.rewritten, qc.compiler.RewrittenVars)) - - qc.compiler.setInputType() - qc.typeEnv, errs = checker.CheckBody(qc.compiler.TypeEnv, body) if len(errs) > 0 { return nil, errs diff --git a/ast/compile_test.go b/ast/compile_test.go index 70704cbb41..deff492092 100644 --- a/ast/compile_test.go +++ b/ast/compile_test.go @@ -868,7 +868,8 @@ func TestCompilerCheckTypesWithSchema(t *testing.T) { if err != nil { t.Fatal("Unexpected error:", err) } - schemaSet := &SchemaSet{ByPath: map[string]interface{}{"input": schema}} + schemaSet := NewSchemaSet() + schemaSet.ByPath.Put(InputRootRef, schema) c.WithSchemas(schemaSet) compileStages(c, c.checkTypes) assertNotFailed(t, c) @@ -4433,7 +4434,8 @@ func TestParseSchemaWithSchemaBadSchema(t *testing.T) { func TestWithSchema(t *testing.T) { c := NewCompiler() - schemaSet := &SchemaSet{ByPath: map[string]interface{}{"input": objectSchema}} + schemaSet := NewSchemaSet() + schemaSet.ByPath.Put(InputRootRef, objectSchema) c.WithSchemas(schemaSet) if c.schemaSet == nil { t.Fatalf("WithSchema did not set the schema correctly in the compiler") @@ -4586,7 +4588,7 @@ const refSchema = ` "null" ] }, - + "kind": { "description": "Kind is a string value representing the REST resource this object represents. Servers may infer this from the endpoint the client submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds", "type": [ diff --git a/ast/env.go b/ast/env.go index ab4f0e688f..6a19b07cfb 100644 --- a/ast/env.go +++ b/ast/env.go @@ -11,8 +11,9 @@ import ( // TypeEnv contains type info for static analysis such as type checking. type TypeEnv struct { - tree *typeTreeNode - next *TypeEnv + tree *typeTreeNode + next *TypeEnv + schemaSet *SchemaSet } // NewTypeEnv returns an empty TypeEnv. @@ -22,6 +23,29 @@ func NewTypeEnv() *TypeEnv { } } +// WithSchemas sets the user-provided schemas +func (env *TypeEnv) WithSchemas(schemas *SchemaSet) *TypeEnv { + env.schemaSet = schemas + return env +} + +// GetPrefix returns the shortest prefix of ref that exists in env +func (env *TypeEnv) GetPrefix(ref Ref) (Ref, types.Type) { + if len(ref) == 1 { + t := env.Get(ref) + if t != nil { + return ref, t + } + } + for i := 1; i < len(ref); i++ { + t := env.Get(ref[:i]) + if t != nil { + return ref[:i], t + } + } + return nil, nil +} + // Get returns the type of x. func (env *TypeEnv) Get(x interface{}) types.Type { diff --git a/ast/parser.go b/ast/parser.go index ab3ddf5962..f0c5373e07 100644 --- a/ast/parser.go +++ b/ast/parser.go @@ -9,6 +9,9 @@ import ( "fmt" "io" "math/big" + "strings" + + "gopkg.in/yaml.v2" "github.com/open-policy-agent/opa/ast/internal/scanner" "github.com/open-policy-agent/opa/ast/internal/tokens" @@ -52,13 +55,20 @@ func (s *state) Text(offset, end int) []byte { // Parser is used to parse Rego statements. type Parser struct { - r io.Reader - s *state + r io.Reader + s *state + po ParserOptions +} + +// ParserOptions defines the options for parsing Rego statements. +type ParserOptions struct { + ProcessAnnotation bool } // NewParser creates and initializes a Parser. func NewParser() *Parser { - p := &Parser{s: &state{}} + p := &Parser{s: &state{}, + po: ParserOptions{}} return p } @@ -76,15 +86,82 @@ func (p *Parser) WithReader(r io.Reader) *Parser { return p } +// WithProcessAnnotation enables or disables the processing of +// annotations by the Parser +func (p *Parser) WithProcessAnnotation(processAnnotation bool) *Parser { + p.po.ProcessAnnotation = processAnnotation + return p +} + +const metadata = "METADATA" +const ruleScope = "rule" + +// Metadata is used to unmarshal the policy metadata information +type Metadata struct { + Scope string `yaml:"scope"` + Schemas []map[string]string `yaml:"schemas"` +} + +// getAnnotation returns annotations in the comment if any +func (p *Parser) getAnnotation(rule *Rule, endYamlLine int) (Annotations, error) { + var metadataYaml []byte + var startYamlLine, currentYamlLine, prevYamlLine int + for i := 0; i < len(p.s.comments); i++ { + comment := p.s.comments[i] + currentYamlLine = comment.Location.Row + if currentYamlLine > endYamlLine { //comment comes after the rule - not relevant + break + } + + if currentYamlLine != (prevYamlLine + 1) { //comment not part of the same block - not relevant + startYamlLine = 0 + metadataYaml = nil + } + + if strings.HasPrefix((strings.TrimSpace(string(comment.Text))), metadata) && comment.Location.Col == 1 { // found METADATA signalling start in a block comment + startYamlLine = currentYamlLine + 1 + metadataYaml = make([]byte, 0) + } + + if startYamlLine != 0 && currentYamlLine >= startYamlLine && currentYamlLine <= endYamlLine && comment.Location.Col == 1 { //build yaml content from block comment only + metadataYaml = append(metadataYaml, comment.Text...) + metadataYaml = append(metadataYaml, []byte("\n")...) + } + prevYamlLine = currentYamlLine + } + + if prevYamlLine == endYamlLine && len(metadataYaml) > 0 { + metadata := &Metadata{} + err := yaml.Unmarshal(metadataYaml, metadata) + if err != nil { + return nil, fmt.Errorf("unable to unmarshall the metadata yaml in comment") + } + + if metadata.Scope == ruleScope && metadata.Schemas != nil { + var sannot []SchemaAnnotation + for _, schemas := range metadata.Schemas { + for path, schema := range schemas { + sannot = append(sannot, SchemaAnnotation{Path: path, Schema: schema}) + } + } + return &SchemaAnnotations{SchemaAnnotation: sannot, + Scope: ruleScope, + Rule: rule}, nil + } + } + return nil, nil + +} + // Parse will read the Rego source and parse statements and // comments as they are found. Any errors encountered while // parsing will be accumulated and returned as a list of Errors. -func (p *Parser) Parse() ([]Statement, []*Comment, Errors) { +func (p *Parser) Parse() ([]Statement, []*Comment, []Annotations, Errors) { var err error p.s.s, err = scanner.New(p.r) if err != nil { - return nil, nil, Errors{ + return nil, nil, nil, Errors{ &Error{ Code: ParseErr, Message: err.Error(), @@ -97,6 +174,7 @@ func (p *Parser) Parse() ([]Statement, []*Comment, Errors) { p.scan() var stmts []Statement + var annotations []Annotations // Read from the scanner until the last token is reached or no statements // can be parsed. Attempt to parse package statements, import statements, @@ -131,6 +209,17 @@ func (p *Parser) Parse() ([]Statement, []*Comment, Errors) { if rules := p.parseRules(); rules != nil { for i := range rules { stmts = append(stmts, rules[i]) + // Append schema annotation to rule if there is one, and if processAnnotation option is on + if p.po.ProcessAnnotation { + ruleLoc := rules[i].Location.Row + annot, err := p.getAnnotation(rules[i], ruleLoc-1) + if err != nil { + p.error(rules[i].Location, err.Error()) + } + if annot != nil { + annotations = append(annotations, annot) + } + } } continue } else if len(p.s.errors) > 0 { @@ -148,7 +237,7 @@ func (p *Parser) Parse() ([]Statement, []*Comment, Errors) { break } - return stmts, p.s.comments, p.s.errors + return stmts, p.s.comments, annotations, p.s.errors } func (p *Parser) parsePackage() *Package { diff --git a/ast/parser_ext.go b/ast/parser_ext.go index 644169a487..a92f8ff19e 100644 --- a/ast/parser_ext.go +++ b/ast/parser_ext.go @@ -418,11 +418,18 @@ func ParseImports(input string) ([]*Import, error) { // For details on Module objects and their fields, see policy.go. // Empty input will return nil, nil. func ParseModule(filename, input string) (*Module, error) { - stmts, comments, err := ParseStatements(filename, input) + return ParseModuleWithOpts(filename, input, ParserOptions{}) +} + +// ParseModuleWithOpts returns a parsed Module object, and has an additional input ParserOptions +// For details on Module objects and their fields, see policy.go. +// Empty input will return nil, nil. +func ParseModuleWithOpts(filename, input string, popts ParserOptions) (*Module, error) { + stmts, comments, annotations, err := ParseStatementsWithOpts(filename, input, popts) if err != nil { return nil, err } - return parseModule(filename, stmts, comments) + return parseModule(filename, stmts, comments, annotations) } // ParseBody returns exactly one body. @@ -567,7 +574,7 @@ func (a commentKey) Compare(other commentKey) int { // This is the default return value from the parser. func ParseStatements(filename, input string) ([]Statement, []*Comment, error) { - stmts, comment, errs := NewParser().WithFilename(filename).WithReader(bytes.NewBufferString(input)).Parse() + stmts, comment, _, errs := NewParser().WithFilename(filename).WithReader(bytes.NewBufferString(input)).Parse() if len(errs) > 0 { return nil, nil, errs @@ -576,7 +583,25 @@ func ParseStatements(filename, input string) ([]Statement, []*Comment, error) { return stmts, comment, nil } -func parseModule(filename string, stmts []Statement, comments []*Comment) (*Module, error) { +// ParseStatementsWithOpts returns a slice of parsed statements, and has an additional input ParserOptions +// This is the default return value from the parser. +func ParseStatementsWithOpts(filename, input string, popts ParserOptions) ([]Statement, []*Comment, []Annotations, error) { + + parser := NewParser().WithFilename(filename).WithReader(bytes.NewBufferString(input)) + + if popts.ProcessAnnotation { + parser.WithProcessAnnotation(popts.ProcessAnnotation) + } + stmts, comment, annotations, errs := parser.Parse() + + if len(errs) > 0 { + return nil, nil, nil, errs + } + + return stmts, comment, annotations, nil +} + +func parseModule(filename string, stmts []Statement, comments []*Comment, annotation []Annotations) (*Module, error) { if len(stmts) == 0 { return nil, NewError(ParseErr, &Location{File: filename}, "empty module") @@ -591,7 +616,8 @@ func parseModule(filename string, stmts []Statement, comments []*Comment) (*Modu } mod := &Module{ - Package: _package, + Package: _package, + Annotation: annotation, } // The comments slice only holds comments that were not their own statements. diff --git a/ast/parser_test.go b/ast/parser_test.go index aa8e515f6d..4304653585 100644 --- a/ast/parser_test.go +++ b/ast/parser_test.go @@ -2662,6 +2662,348 @@ else = { `), curElse.Head.Value.Location) } +func TestGetAnnotation(t *testing.T) { + + tests := []struct { + note string + module string + expNumComments int + expAnnotations []Annotations + expError string + }{ + { + note: "Single valid annotation", + module: ` +package opa.examples + +import data.servers +import data.networks +import data.ports + +#Schema annotation for this rule referencing a single schema +# METADATA +# scope: rule +# schemas: +# - data.servers: schema.servers +public_servers[server] { + server = servers[i]; server.ports[j] = ports[k].id + ports[k].networks[l] = networks[m].id; + networks[m].public = true +}`, + expNumComments: 5, + expAnnotations: append(make([]Annotations, 0), + &SchemaAnnotations{ + SchemaAnnotation: append(make([]SchemaAnnotation, 0), SchemaAnnotation{Path: "data.servers", Schema: "schemas.servers"}), + Scope: ruleScope, + }), + }, + { + note: "Multiple annotations on multiple lines", + module: ` +package opa.examples + +import data.servers +import data.networks +import data.ports + +#Schema annotation for this rule referencing three schemas +# METADATA +# scope: rule +# schemas: +# - data.servers: schema.servers +# - data.networks: schema.networks +# - data.ports: schema.ports +public_servers[server] { + server = servers[i]; server.ports[j] = ports[k].id + ports[k].networks[l] = networks[m].id; + networks[m].public = true +}`, + expNumComments: 7, + expAnnotations: append(make([]Annotations, 0), + &SchemaAnnotations{ + SchemaAnnotation: append(make([]SchemaAnnotation, 0), SchemaAnnotation{Path: "data.servers", Schema: "schemas.servers"}, SchemaAnnotation{Path: "data.networks", Schema: "schemas.networks"}, SchemaAnnotation{Path: "data.ports", Schema: "schemas.ports"}), + Scope: ruleScope, + }), + }, + { + note: "Comment in between metadata and rule (valid)", + module: ` +package opa.examples + +import data.servers +import data.networks +import data.ports + +#Schema annotation for this rule referencing three schemas +# METADATA +# scope: rule +# schemas: +# - data.servers: schema.servers +# - data.networks: schema.networks +# - data.ports: schema.ports +#This is a comment after the metadata yaml +public_servers[server] { + server = servers[i]; server.ports[j] = ports[k].id + ports[k].networks[l] = networks[m].id; + networks[m].public = true +}`, + expNumComments: 8, + expAnnotations: append(make([]Annotations, 0), + &SchemaAnnotations{ + SchemaAnnotation: append(make([]SchemaAnnotation, 0), SchemaAnnotation{Path: "data.servers", Schema: "schemas.servers"}, SchemaAnnotation{Path: "data.networks", Schema: "schemas.networks"}, SchemaAnnotation{Path: "data.ports", Schema: "schemas.ports"}), + Scope: ruleScope, + }), + }, + { + note: "Empty comment line in between metadata and rule (valid)", + module: ` +package opa.examples + +import data.servers +import data.networks +import data.ports + +#Schema annotation for this rule referencing three schemas +# METADATA +# scope: rule +# schemas: +# - data.servers: schema.servers +# - data.networks: schema.networks +# - data.ports: schema.ports +# +public_servers[server] { + server = servers[i]; server.ports[j] = ports[k].id + ports[k].networks[l] = networks[m].id; + networks[m].public = true +}`, + expNumComments: 8, + expAnnotations: append(make([]Annotations, 0), + &SchemaAnnotations{ + SchemaAnnotation: append(make([]SchemaAnnotation, 0), SchemaAnnotation{Path: "data.servers", Schema: "schemas.servers"}, SchemaAnnotation{Path: "data.networks", Schema: "schemas.networks"}, SchemaAnnotation{Path: "data.ports", Schema: "schemas.ports"}), + Scope: ruleScope, + }), + }, + { + note: "Ill-structured (invalid) metadata start", + module: ` +package opa.examples + +import data.servers +import data.networks +import data.ports + +#Schema annotation for this rule referencing three schemas +# METADATA +# scope: rule +# schemas: +# - data.servers: schema.servers +# - data.networks: schema.networks +# - data.ports: schema.ports +# METADATA +public_servers[server] { + server = servers[i]; server.ports[j] = ports[k].id + ports[k].networks[l] = networks[m].id; + networks[m].public = true +}`, + expNumComments: 8, + expAnnotations: nil, + }, + { + note: "Ill-structured (valid) annotation", + module: ` +package opa.examples + +import data.servers +import data.networks +import data.ports + +#Schema annotation for this rule referencing three schemas +# METADATA +# scope: rule +# schemas: +# - data/servers: schemas/servers +public_servers[server] { + server = servers[i]; server.ports[j] = ports[k].id + ports[k].networks[l] = networks[m].id; + networks[m].public = true +}`, + expNumComments: 5, + expAnnotations: append(make([]Annotations, 0), + &SchemaAnnotations{ + SchemaAnnotation: append(make([]SchemaAnnotation, 0), SchemaAnnotation{Path: "data/servers", Schema: "schemas/servers"}), + Scope: ruleScope, + }), + }, + { + note: "Ill-structured (invalid) annotation", + module: ` +package opa.examples + +import data.servers +import data.networks +import data.ports + +#Schema annotation for this rule referencing three schemas +# METADATA +# scope: rule +# schemas: +# - data.servers= schema +public_servers[server] { + server = servers[i]; server.ports[j] = ports[k].id + ports[k].networks[l] = networks[m].id; + networks[m].public = true +}`, + expNumComments: 5, + expAnnotations: nil, + expError: "unable to unmarshall the metadata yaml in comment", + }, + { + note: "Indentation error in yaml", + module: ` +package opa.examples + +import data.servers +import data.networks +import data.ports + +#Schema annotation for this rule referencing three schemas +# METADATA +# scope: rule +# schemas: +# - data.servers: schema.servers +# - data.networks: schema.networks +# - data.ports: schema.ports +public_servers[server] { + server = servers[i]; server.ports[j] = ports[k].id + ports[k].networks[l] = networks[m].id; + networks[m].public = true +}`, + expNumComments: 7, + expAnnotations: nil, + expError: "unable to unmarshall the metadata yaml in comment", + }, + { + note: "Multiple rules with and without metadata", + module: ` +package opa.examples + +import data.servers +import data.networks +import data.ports + +#Schema annotation for this rule referencing three schemas +# METADATA +# scope: rule +# schemas: +# - data.servers: schema.servers +# - data.networks: schema.networks +# - data.ports: schema.ports +public_servers[server] { + server = servers[i]; server.ports[j] = ports[k].id + ports[k].networks[l] = networks[m].id; + networks[m].public = true +} + +public_servers_1[server] { + server = servers[i]; server.ports[j] = ports[k].id + ports[k].networks[l] = networks[m].id; + networks[m].public = true + server.typo # won't catch this type error since rule has no schema metadata +}`, + expNumComments: 8, + expAnnotations: append(make([]Annotations, 0), + &SchemaAnnotations{ + SchemaAnnotation: append(make([]SchemaAnnotation, 0), SchemaAnnotation{Path: "data.servers", Schema: "schemas.servers"}, SchemaAnnotation{Path: "data.networks", Schema: "schemas.networks"}, SchemaAnnotation{Path: "data.ports", Schema: "schemas.ports"}), + Scope: ruleScope, + }), + }, + { + note: "Multiple rules with metadata", + module: ` +package opa.examples + +import data.servers +import data.networks +import data.ports + +#Schema annotation for this rule referencing three schemas +# METADATA +# scope: rule +# schemas: +# - data.servers: schema.servers +public_servers[server] { + server = servers[i] +} + +#Schema annotation for this rule referencing three schemas +# METADATA +# scope: rule +# schemas: +# - data.networks: schema.networks +# - data.ports: schema.ports +public_servers_1[server] { + ports[k].networks[l] = networks[m].id; + networks[m].public = true +}`, + expNumComments: 11, + expAnnotations: append(make([]Annotations, 0), + &SchemaAnnotations{ + SchemaAnnotation: append(make([]SchemaAnnotation, 0), SchemaAnnotation{Path: "data.servers", Schema: "schemas.servers"}), + Scope: ruleScope, + Rule: MustParseRule(`public_servers[server] { server = servers[i] }`), + }, &SchemaAnnotations{ + SchemaAnnotation: append(make([]SchemaAnnotation, 0), SchemaAnnotation{Path: "data.networks", Schema: "schemas.networks"}, SchemaAnnotation{Path: "data.ports", Schema: "schemas.ports"}), + Scope: ruleScope, + Rule: MustParseRule(`public_servers_1[server] { ports[k].networks[l] = networks[m].id; networks[m].public = true }`), + }), + }, + } + + for _, tc := range tests { + t.Run(tc.note, func(t *testing.T) { + mod, err := ParseModuleWithOpts("test.rego", tc.module, ParserOptions{ + ProcessAnnotation: true, + }) + if err != nil { + if tc.expError == "" || !strings.Contains(err.Error(), tc.expError) { + t.Fatalf("Unexpected parse error when getting annotations: %v", err) + } + return + } else if tc.expError != "" { + t.Fatalf("Expected err: %v but no error from parse module", tc.expError) + } + + if len(mod.Comments) != tc.expNumComments { + t.Errorf("Expected %v comments but got %v", tc.expNumComments, len(mod.Comments)) + } + + annotations := mod.Annotation + if len(annotations) != len(tc.expAnnotations) { + t.Errorf("Expected %v annotations but got %v", len(tc.expAnnotations), len(annotations)) + } + + for _, annot := range annotations { + schemaAnnots, ok := annot.(*SchemaAnnotations) + if !ok { + t.Fatalf("Expected err: %v but no error from parse module", tc.expError) + } + for _, tcannot := range tc.expAnnotations { + tcschemaAnnots, ok := tcannot.(*SchemaAnnotations) + if !ok { + t.Fatalf("Expected err: %v but no error from parse module", tc.expError) + } + if schemaAnnots.Scope == ruleScope && tcschemaAnnots.Scope == ruleScope && tcschemaAnnots.Rule != nil && schemaAnnots.Rule.Head.Name == tcschemaAnnots.Rule.Head.Name { + if len(schemaAnnots.SchemaAnnotation) != len(tcschemaAnnots.SchemaAnnotation) { + t.Errorf("Expected %v annotations but got %v", len(schemaAnnots.SchemaAnnotation), len(tcschemaAnnots.SchemaAnnotation)) + } + } + } + + } + }) + } +} + func assertLocationText(t *testing.T, expected string, actual *Location) { t.Helper() if actual == nil || actual.Text == nil { diff --git a/ast/policy.go b/ast/policy.go index 9886dfab93..b317ce9ad6 100644 --- a/ast/policy.go +++ b/ast/policy.go @@ -118,10 +118,11 @@ type ( // within a namespace (defined by the package) and optional // dependencies on external documents (defined by imports). Module struct { - Package *Package `json:"package"` - Imports []*Import `json:"imports,omitempty"` - Rules []*Rule `json:"rules,omitempty"` - Comments []*Comment `json:"comments,omitempty"` + Package *Package `json:"package"` + Imports []*Import `json:"imports,omitempty"` + Rules []*Rule `json:"rules,omitempty"` + Comments []*Comment `json:"comments,omitempty"` + Annotation []Annotations `json:"annotation,omitempty"` } // Comment contains the raw text from the comment in the definition. @@ -130,6 +131,29 @@ type ( Location *Location } + // Annotations contains information extracted from metadata in comments + Annotations interface { + annotationMaker() + + // NOTE(tsandall): these are temporary interfaces that are required to support copy operations. + // When we get rid of the rule pointers, these may not be needed. + copy(Node) Annotations + node() Node + } + + // SchemaAnnotations contains information about schemas + SchemaAnnotations struct { + SchemaAnnotation []SchemaAnnotation `json:"schemaannotation"` + Scope string `json:"scope"` + Rule *Rule `json:"-"` + } + + // SchemaAnnotation contains information about a schema + SchemaAnnotation struct { + Path string `json:"path"` + Schema string `json:"schema"` + } + // Package represents the namespace of the documents produced // by rules inside the module. Package struct { @@ -202,6 +226,18 @@ type ( } ) +func (s *SchemaAnnotations) copy(node Node) Annotations { + cpy := *s + cpy.Rule = node.(*Rule) + return &cpy +} + +func (s *SchemaAnnotations) node() Node { + return s.Rule +} + +func (*SchemaAnnotations) annotationMaker() {} + // Compare returns an integer indicating whether mod is less than, equal to, // or greater than other. func (mod *Module) Compare(other *Module) int { @@ -226,9 +262,28 @@ func (mod *Module) Compare(other *Module) int { func (mod *Module) Copy() *Module { cpy := *mod cpy.Rules = make([]*Rule, len(mod.Rules)) + + // NOTE(tsandall): only construct the map if annotations are present. This is a temporary + // workaround to deal with the lack of a stable index mapping annotations to rules. + var rules map[Node]Node + if len(mod.Annotation) > 0 { + rules = make(map[Node]Node, len(mod.Rules)) + } + for i := range mod.Rules { cpy.Rules[i] = mod.Rules[i].Copy() + cpy.Rules[i].Module = &cpy + + if rules != nil { + rules[mod.Rules[i]] = cpy.Rules[i] + } } + + cpy.Annotation = make([]Annotations, len(mod.Annotation)) + for i := range mod.Annotation { + cpy.Annotation[i] = mod.Annotation[i].copy(rules[mod.Annotation[i].node()]) + } + cpy.Imports = make([]*Import, len(mod.Imports)) for i := range mod.Imports { cpy.Imports[i] = mod.Imports[i].Copy() diff --git a/bundle/bundle.go b/bundle/bundle.go index faa29949e4..dca6d25919 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -296,6 +296,7 @@ type Reader struct { baseDir string verificationConfig *VerificationConfig skipVerify bool + processAnnotations bool files map[string]FileInfo // files in the bundle signature payload sizeLimitBytes int64 } @@ -349,6 +350,12 @@ func (r *Reader) WithSkipBundleVerification(skipVerify bool) *Reader { return r } +// WithProcessAnnotations enables annotation processing during .rego file parsing. +func (r *Reader) WithProcessAnnotations(yes bool) *Reader { + r.processAnnotations = yes + return r +} + // WithSizeLimitBytes sets the size limit to apply to files in the bundle. If files are larger // than this, an error will be returned by the reader. func (r *Reader) WithSizeLimitBytes(n int64) *Reader { @@ -410,7 +417,7 @@ func (r *Reader) Read() (Bundle, error) { if strings.HasSuffix(path, RegoExt) { fullPath := r.fullPath(path) r.metrics.Timer(metrics.RegoModuleParse).Start() - module, err := ast.ParseModule(fullPath, buf.String()) + module, err := ast.ParseModuleWithOpts(fullPath, buf.String(), ast.ParserOptions{ProcessAnnotation: r.processAnnotations}) r.metrics.Timer(metrics.RegoModuleParse).Stop() if err != nil { return bundle, err diff --git a/cmd/eval.go b/cmd/eval.go index 9299b390ab..e5cd69722a 100644 --- a/cmd/eval.go +++ b/cmd/eval.go @@ -11,6 +11,7 @@ import ( "io" "io/ioutil" "os" + "path/filepath" "strconv" "strings" @@ -207,9 +208,11 @@ Set the output format with the --format flag. Schema ------ -The -s/--schema flag provides a single JSON Schema used to validate references to the input document. +The -s/--schema flag provides one or more JSON Schemas used to validate references to the input or data documents. +Loads a single JSON file, applying it to the input document; or all the schema files under the specified directory. - $ opa eval --data policy.rego --input input.json --schema input-schema.json + $ opa eval --data policy.rego --input input.json --schema schema.json + $ opa eval --data policy.rego --input input.json --schema schemas/ `, PreRunE: func(cmd *cobra.Command, args []string) error { @@ -265,6 +268,8 @@ The -s/--schema flag provides a single JSON Schema used to validate references t RootCommand.AddCommand(evalCommand) } +const schemaVar = "schema" + func eval(args []string, params evalCommandParams, w io.Writer) (bool, error) { ectx, err := setupEval(args, params) @@ -430,20 +435,15 @@ func setupEval(args []string, params evalCommandParams) (*evalContext, error) { regoArgs = append(regoArgs, rego.ParsedInput(inputValue)) } - schemaBytes, err := readSchemaBytes(params) + /* + -s {file} (one input schema file) + -s {directory} (one schema directory with input and data schema files) + */ + schemaSet, err := readSchemaBytes(params) if err != nil { return nil, err } - - if schemaBytes != nil { - var schema interface{} - err := util.Unmarshal(schemaBytes, &schema) - if err != nil { - return nil, fmt.Errorf("unable to parse schema: %s", err.Error()) - } - schemaSet := &ast.SchemaSet{ByPath: map[string]interface{}{"input": schema}} - regoArgs = append(regoArgs, rego.Schemas(schemaSet)) - } + regoArgs = append(regoArgs, rego.Schemas(schemaSet)) var tracer *topdown.BufferTracer @@ -542,14 +542,86 @@ func readInputBytes(params evalCommandParams) ([]byte, error) { return nil, nil } -func readSchemaBytes(params evalCommandParams) ([]byte, error) { +func readSchemaBytes(params evalCommandParams) (*ast.SchemaSet, error) { if params.schemaPath != "" { + ss := ast.NewSchemaSet() + var schema interface{} path, err := fileurl.Clean(params.schemaPath) if err != nil { return nil, err } - return ioutil.ReadFile(path) + + if info, err := os.Stat(path); err == nil && !info.IsDir() { //contains a single input schema file + schemaBytes, err := ioutil.ReadFile(path) + if err != nil { + return nil, err + } + + err = util.Unmarshal(schemaBytes, &schema) + if err != nil { + return nil, fmt.Errorf("unable to unmarshal schema: %s", err.Error()) + } + + ss.ByPath.Put(ast.InputRootRef, schema) + return ss, nil + } else if err != nil { + return nil, err + } + + rootDir := path + + err = filepath.Walk(path, + func(path string, info os.FileInfo, err error) error { + if err != nil { + return fmt.Errorf("error in walking file path: %w", err) + } + + if info.IsDir() { // ignoring directories + return nil + } + + // proceed knowing it's a file + schemaBytes, err := ioutil.ReadFile(path) + if err != nil { + return err + } + err = util.Unmarshal(schemaBytes, &schema) + if err != nil { + return fmt.Errorf("unable to unmarshal schema: %s", err) + } + + relPath, err := filepath.Rel(rootDir, path) + if err != nil { + return err + } + + front := filepath.Dir(relPath) + last := strings.TrimSuffix(filepath.Base(relPath), filepath.Ext(path)) + + var parts []string + + if front != "." { + parts = append(strings.Split(filepath.ToSlash(front), "/"), last) + } else { + parts = []string{last} + } + + key := make(ast.Ref, 1+len(parts)) + key[0] = ast.VarTerm("schema") + for i := range parts { + key[i+1] = ast.StringTerm(parts[i]) + } + + ss.ByPath.Put(key, schema) + return nil + }) + if err != nil { + return nil, err + } + + return ss, nil } + return nil, nil } diff --git a/cmd/eval_test.go b/cmd/eval_test.go index 94a8da26e2..b97ea0627f 100755 --- a/cmd/eval_test.go +++ b/cmd/eval_test.go @@ -273,7 +273,47 @@ func testEvalWithInvalidSchemaFile(t *testing.T, input string, query string, sch return err } -func TestEvalWithJSONSchemaFile(t *testing.T) { +func testReadParamWithSchemaDir(t *testing.T, input string, query string, inputSchema string) error { + files := map[string]string{ + "input.json": input, + "schemas/input.json": inputSchema, + "schemas/kubernetes/data-schema.json": inputSchema, + } + + var err error + test.WithTempFS(files, func(path string) { + + params := newEvalCommandParams() + params.inputPath = filepath.Join(path, "input.json") + params.schemaPath = filepath.Join(path, "schemas") + + schemaSet, err := readSchemaBytes(params) + if err != nil { + err = fmt.Errorf("Unexpected error or undefined from evaluation: %v", err) + return + } + + if schemaSet == nil { + err = fmt.Errorf("Schema set is empty") + return + } + + if _, ok := schemaSet.ByPath.Get(ast.MustParseRef("schema.input")); !ok { + err = fmt.Errorf("Expected schema for input in schemaSet but got none") + return + } + + if _, ok := schemaSet.ByPath.Get(ast.MustParseRef(`schema.kubernetes["data-schema"]`)); !ok { + err = fmt.Errorf("Expected schemas for data in schemaSet but got none") + return + } + + }) + + return err +} + +func TestEvalWithJSONSchema(t *testing.T) { input := `{ "foo": "a", @@ -358,6 +398,11 @@ func TestEvalWithJSONSchemaFile(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %s", err) } + + err = testReadParamWithSchemaDir(t, input, query, schema) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } } func TestEvalWithInvalidSchemaFile(t *testing.T) { diff --git a/cmd/flags.go b/cmd/flags.go index a83fed7666..65484cbc78 100644 --- a/cmd/flags.go +++ b/cmd/flags.go @@ -131,7 +131,7 @@ func addUnknownsFlag(fs *pflag.FlagSet, unknowns *[]string, value []string) { } func addSchemaFlag(fs *pflag.FlagSet, schemaPath *string) { - fs.StringVarP(schemaPath, "schema", "s", "", "set schema file path") + fs.StringVarP(schemaPath, "schema", "s", "", "set schema file path or directory path") } func addTargetFlag(fs *pflag.FlagSet, target *util.EnumFlag) { diff --git a/docs/content/schemas.md b/docs/content/schemas.md index 981f2006a3..e10f796edf 100644 --- a/docs/content/schemas.md +++ b/docs/content/schemas.md @@ -6,24 +6,53 @@ weight: 2 ## Using schemas to enhance the Rego type checker -You can provide an input schema to `opa eval` to improve static type checking and get more precise error reports as you develop Rego code. -The `-s` flag can be used to upload a single schema for the input document in JSON Schema format. +You can provide one or more input schema files and/or data schema files to `opa eval` to improve static type checking and get more precise error reports as you develop Rego code. +The `-s` flag can be used to upload schemas for input and data documents in JSON Schema format. You can either load a single JSON schema file for the input document or directory of schema files. + +``` +-s, --schema string set schema file path or directory path +``` + +### Passing a single file with -s + +When a single file is passed, it is a schema file associated with the input document globally. This means that for all rules in all packages, the `input` has a type derived from that schema. There is no constraint on the name of the file, it could be anything. + +Example: +``` +opa eval data.envoy.authz.allow -i opa-schema-examples/envoy/input.json -d opa-schema-examples/envoy/policy.rego -s opa-schema-examples/envoy/schemas/my-schema.json ``` --s, --schema string set schema file path + + +### Passing a directory with -s + +When a directory path is passed, annotations will be used in the code to indicate what expressions map to what schemas (see below). +Both input schema files and data schema files can be provided in the same directory, with different names. The directory of schemas may have any sub-directories. Notice that when a directory is passed the input document does not have a schema associated with it globally. This must also +be indicated via an annotation. + + +Example: +``` +opa eval data.kubernetes.admission -i opa-schema-examples/kubernetes/input.json -d opa-schema-examples/kubernetes/policy.rego -s opa-schema-examples/kubernetes/schemas + ``` +Schemas can also be provided for policy and data files loaded via `opa eval --bundle` + + Example: ``` -opa eval data.envoy.authz.allow -i example/envoy/input.json -d example/envoy/policy.rego -s example/envoy/input-schema.json +opa eval data.kubernetes.admission -i opa-schema-examples/kubernetes/input.json -b opa-schema-examples/bundle.tar.gz -s opa-schema-examples/kubernetes/schemas + ``` -Samples provided at: https://github.com/aavarghese/opa-schema-examples/tree/main/envoy + +Samples provided at: https://github.com/aavarghese/opa-schema-examples/ -## Usage Scenario +## Usage scenario with a single schema file -Consider the following Rego code, which assumes as input a Kubernetes admission review. For resources that are `Pod`s, it checks that the image name +Consider the following Rego code, which assumes as input a Kubernetes admission review. For resources that are Pods, it checks that the image name starts with a specific prefix. `pod.rego` @@ -43,7 +72,7 @@ Notice that this code has a typo in it: `input.request.kind.kinds` is undefined Consider the following input document: -`admission-review.json` +`input.json` ``` { "kind": "AdmissionReview", @@ -73,15 +102,15 @@ Consider the following input document: } ``` - Clearly there are 2 image names that are in violation of the policy. However, when we evalute the erroneous Rego code against this input we obtain: + Clearly there are 2 image names that are in violation of the policy. However, when we evaluate the erroneous Rego code against this input we obtain: ``` - % opa eval --format pretty -i admission-review.json -d pod.rego - $ [] + % opa eval --format pretty -i opa-schema-examples/kubernetes/input.json -d opa-schema-examples/kubernetes/policy.rego + [] ``` The empty value returned is indistinguishable from a situation where the input did not violate the policy. This error is therefore causing the policy not to catch violating inputs appropriately. - If we fix the Rego code and change`input.request.kind.kinds` to `input.request.kind.kind`, then we obtain the expected result: + If we fix the Rego code and change `input.request.kind.kinds` to `input.request.kind.kind`, then we obtain the expected result: ``` [ "image 'nginx' comes from untrusted registry", @@ -90,16 +119,16 @@ Consider the following input document: ``` With this feature, it is possible to pass a schema to `opa eval`, written in JSON Schema. Consider the admission review schema provided at: - https://github.com/aavarghese/opa-schema-examples/blob/main/kubernetes/admission-schema.json + https://github.com/aavarghese/opa-schema-examples/blob/main/kubernetes/schemas/input.json We can pass this schema to the evaluator as follows: ``` - % opa eval --format pretty -i admission-review.json -d pod.rego -s admission-schema.json + % opa eval --format pretty -i opa-schema-examples/kubernetes/input.json -d opa-schema-examples/kubernetes/policy.rego -s opa-schema-examples/kubernetes/schemas/input.json ``` With the erroneous Rego code, we now obtain the following type error: ``` - 1 error occurred: ../../aavarghese/opa-schema-examples/kubernetes/pod.rego:5: rego_type_error: undefined ref: input.request.kind.kinds + 1 error occurred: ../../aavarghese/opa-schema-examples/kubernetes/policy.rego:5: rego_type_error: undefined ref: input.request.kind.kinds input.request.kind.kinds ^ have: "kinds" @@ -108,19 +137,201 @@ Consider the following input document: This indicates the error to the Rego developer right away, without having the need to observe the results of runs on actual data, thereby improving productivity. +## Schema annotations -## References +When passing a directory of schemas to `opa eval`, schema annotations become handy to associate a Rego expression with a corresponding schema within a given scope: -For more examples, please see https://github.com/aavarghese/opa-schema-examples +``` +# METADATA +# scope: rule +# schemas: +# - : +# ... +# - : + { + ... +} +``` -This contains samples for Envoy, Kubernetes, and Terraform including corresponding JSON Schemas. +The annotation must be specified as a yaml within a comment block that **must** start with `# METADATA`. Next, it contains a `scope` field to indicate the scope of application of the annotation. The only scope currently supported is `rule`, meaning an annotation that applies within the scope of a rule. Also, every line in the comment block containing the annotation **must** start at Column 1 in the module/file, or otherwise, they will be ignored. Notice that the comment block containing the annotation **must** immediately precede the block defining the rule without any empty lines in between. + +The `schemas` field specifies an array associating schemas to expressions. An expression is of the form `.field1. ... .fieldN`. +Note that we currently don't support aliasing, so the expression must start with either `input` or `data`. + +The type checker derives a Rego Object type for the schema and an appropriate entry is added to the type environment before type checking the rule. This entry is removed upon exit from the rule. + +Example: + +Consider the following Rego code which checks if an operation is allowed by a user, given an acl data document: + +``` +package policy + +import data.acl + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - input: schema["input"] +# - data.acl: schema["acl-schema"] +allow { + access = data.acl["alice"] + access[_] == input.operation +} + +allow { + access = data.acl["bob"] + access[_] == input.operation +} +``` + +Consider a directory named `mySchemasDir` with the following structure, provided via `opa eval --schema opa-schema-examples/mySchemasDir` + +```$ tree mySchemasDir/ +mySchemasDir/ +├── input.json +└── acl-schema.json +``` + +For actual code samples, see: https://github.com/aavarghese/opa-schema-examples/acl + +In the first `allow` rule above, the input document has the schema `input.json`, and `data.acl` has the schema `acl-schema.json`. Note that we use the relative path inside the `mySchemasDir` directory to identify a schema, omit the `.json` suffix, and use the global variable `schema` to stand for the top-level of the directory. +Schemas in annotations are proper Rego references. So `schema.input` is also valid, but `schema.acl-schema` is not. + +If we had the expression `data.acl.foo` in this rule, it would result in a type error because it is not allowed according to `acl-schema.json`. + +On the other hand, this annotation does not constrain other paths under `data`. What it says is that we know the type of `data.acl` statically, but not that of other paths. So for example, `data.foo` is not a type error and gets assigned the type `Any`. + +Note that the second `allow` rule doesn't have a METADATA comment block attached to it, and hence will not be type checked with any schemas. + +On a different note, schema annotations can also be added to policy files part of a bundle package loaded via `opa eval --bundle` alongwith the `--schema` parameter for type checking a set of `*.rego` policy files. + + +### Schema overriding + +JSON Schemas are often incomplete specifications of the format of data. For example, a Kubernetes Admission Review resource has a field `object` which can contain any other Kubernetes resource. A schema for Admission Review has a generic type `object` for that field that has no further specification. To allow more precise type checking in such cases, we support overriding existing schemas. + +Consider the following example: +``` +package kubernetes.admission + +# METADATA +# scope: rule +# schemas: +# - input: schema["input"] +# - input.request.object: schema.kubernetes["pod"] +deny[msg] { + input.request.kind.kind == "Pod" + image := input.request.object.spec.containers[_].image + not startswith(image, "hooli.com/") + msg := sprintf("image '%v' comes from untrusted registry", [image]) +} +``` + +In this example, the `input` is associated with an Admission Review schema, and furthermore `input.request.object` is set to have the schema of a Kubernetes Pod. In effect, the second schema annotation overrides the first one. Overriding is a schema transformation feature and combines existing schemas. In this case, we are combining the Admission Review schema with that of a Pod. + +Notice that the order of schema annotations matter for overriding to work correctly. + +Given a schema annotation, if a prefix of the path already has a type in the environment, then the annotation has the effect of merging and overriding the existing type with the type derived from the schema. In the example above, the prefix `input` already has a type in the type environment, so the second annotation overrides this existing type. Overriding affects the type of the longest prefix that already has a type. If no such prefix exists, the new path and type are added to the type environment for the scope of the rule. + +In general, consider the existing Rego type: + +``` +object{a: object{b: object{c: C, d: D, e: E}}} +``` +If we override this type with the following type (derived from a schema annotation of the form `a.b.e: schema-for-E1`): + +``` +object{a: object{b: object{e: E1}}} +``` + +It results in the following type: + +``` +object{a: object{b: object{c: C, d: D, e: E1}}} +``` + +Notice that `b` still has its fields `c` and `d`, so overriding has a merging effect as well. Moreover, the type of expression `a.b.e` is now `E1` instead of `E`. + + + + + +We can also use overriding to add new paths to an existing type, so if we override the initial type with the following: + +``` +object{a: object{b: object{f: F}}} +``` + +we obtain the following type: + +``` +object{a: object{b: object{c: C, d: D, e: E, f: F}}} +``` + + + +We use schemas to enhance the type checking capability of OPA, and not to validate the input and data documents against desired schemas. This burden is still on the user and care must be taken when using overriding to ensure that the input and data provided are sensible and validated against the transformed schemas. + + +### Multiple input schemas + +It is sometimes useful to have different input schemas for different rules in the same package. This can be achieved as illustrated by the following example: + +``` +package policy + +import data.acl + +default allow = false + +# METADATA +# scope: rule +# schemas: +# - input: schema["input"] +# - data.acl: schema["acl-schema"] +allow { + access = data.acl[input.user] + access[_] == input.operation +} + +# METADATA for whocan rule +# scope: rule +# schemas: +# - input: schema["whocan-input-schema"] +# - data.acl: schema["acl-schema"] +whocan[user] { + access = acl[user] + access[_] == input.operation +} +``` + +The directory that is passed to `opa eval` is the following: +```$ tree mySchemasDir/ +mySchemasDir/ +├── input.json +└── acl-schema.json +└── whocan-input-schema.json +``` + +In this example, we associate the schema `input.json` with the input document in the rule `allow`, and the schema `whocan-input-schema.json` +with the input document for the rule `whocan`. + +### Translating schemas to Rego types and dynamicity + +Rego has a gradual type system meaning that types can be partially known statically. For example, an object could have certain fields whose types are known and others that are unknown statically. OPA type checks what it knows statically and leaves the unknown parts to be type checked at runtime. An OPA object type has two parts: the static part with the type information known statically, and a dynamic part, which can be nil (meaning everything is known statically) or non-nil and indicating what is unknown. + +When we derive a type from a schema, we try to match what is known and unknown in the schema. For example, an `object` that has no specified fields becomes the Rego type `Object{Any: Any}`. However, currently `additionalProperties` and `additionalItems` are ignored. When a schema is fully specified, we derive a type with its dynamic part set to nil, meaning that we take a strict interpretation in order to get the most out of static type checking. This is the case even if `additionalProperties` is set to `true` in the schema. In the future, we will take this feature into account when deriving Rego types. + +When overriding existing types, the dynamicity of the overridden prefix is preserved. -For a reference on JSON Schema please see: http://json-schema.org/understanding-json-schema/reference/index.html ## Limitations -Currently this feature admits schemas written in JSON Schema but does not support every feature available in this format. This is part of future work. -In particular the following features are not yet suported: +Currently this feature admits schemas written in JSON Schema but does not support every feature available in this format. +In particular the following features are not yet supported: * additional properties for objects * pattern properties for objects @@ -129,3 +340,24 @@ In particular the following features are not yet suported: * allOf, anyOf, oneOf, not * enum * if/then/else + +A note of caution: overriding is a powerful capability that must be used carefully. For example, the user is allowed to write: + +``` +# METADATA +# scope: rule +# schema: +# - data: schema["some-schema"] +``` + +In this case, we are overriding the root of all documents to have some schema. Since all Rego code lives under `data` as virtual documents, this in practice renders all of them inaccessible (resulting in type errors). Similarly, assigning a schema to a package name is not a good idea and can cause problems. Care must also be taken when defining overrides so that the transformation of schemas is sensible and data can be validated against the transformed schema. + +## References + +For more examples, please see https://github.com/aavarghese/opa-schema-examples + +This contains samples for Envoy, Kubernetes, and Terraform including corresponding JSON Schemas. + +For a reference on JSON Schema please see: http://json-schema.org/understanding-json-schema/reference/index.html + +For a tool that generates JSON Schema from JSON samples, please see: https://jsonschema.net/home \ No newline at end of file diff --git a/go.mod b/go.mod index 7a5782c477..439a3813de 100644 --- a/go.mod +++ b/go.mod @@ -33,4 +33,6 @@ require ( golang.org/x/net v0.0.0-20201021035429-f5854403a974 golang.org/x/tools v0.1.0 golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba + golang.org/x/text v0.3.3 // indirect + gopkg.in/yaml.v2 v2.3.0 ) diff --git a/go.sum b/go.sum index b51857ccbc..239dac5e2e 100644 --- a/go.sum +++ b/go.sum @@ -336,6 +336,8 @@ golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvx golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20200302205851-738671d3881b h1:Wh+f8QHJXR411sJR8/vRBTZ7YapZaRvUcLFFJhusH0k= +golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5 h1:2M3HP5CCK1Si9FQhwnzYhXdG6DXeebvUHFpre8QvbyI= golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= @@ -361,6 +363,9 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20200927032502-5d4f70055728 h1:5wtQIAulKU5AbLQOkjxl32UufnIOqgBX72pS0AV14H0= +golang.org/x/net v0.0.0-20200927032502-5d4f70055728/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20201021035429-f5854403a974 h1:IX6qOQeG5uLjB/hjjwjedwfjND0hgjPMMyO1RoIXQNI= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -371,6 +376,7 @@ golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= diff --git a/loader/loader.go b/loader/loader.go index 79c740c73c..bc7763b537 100644 --- a/loader/loader.go +++ b/loader/loader.go @@ -89,6 +89,7 @@ type FileLoader interface { WithMetrics(m metrics.Metrics) FileLoader WithBundleVerificationConfig(*bundle.VerificationConfig) FileLoader WithSkipBundleVerification(skipVerify bool) FileLoader + WithProcessAnnotation(processAnnotation bool) FileLoader } // NewFileLoader returns a new FileLoader instance. @@ -107,11 +108,12 @@ type descriptor struct { } type fileLoader struct { - metrics metrics.Metrics - bvc *bundle.VerificationConfig - skipVerify bool - descriptors []*descriptor - files map[string]bundle.FileInfo + metrics metrics.Metrics + bvc *bundle.VerificationConfig + skipVerify bool + descriptors []*descriptor + files map[string]bundle.FileInfo + processAnnotation bool } // WithMetrics provides the metrics instance to use while loading @@ -132,6 +134,12 @@ func (fl *fileLoader) WithSkipBundleVerification(skipVerify bool) FileLoader { return fl } +// WithProcessAnnotation enables or disables processing of schema annotations on rules +func (fl *fileLoader) WithProcessAnnotation(processAnnotation bool) FileLoader { + fl.processAnnotation = processAnnotation + return fl +} + // All returns a Result object loaded (recursively) from the specified paths. func (fl fileLoader) All(paths []string) (*Result, error) { return fl.Filtered(paths, nil) @@ -148,7 +156,7 @@ func (fl fileLoader) Filtered(paths []string, filter Filter) (*Result, error) { return err } - result, err := loadKnownTypes(path, bs, fl.metrics) + result, err := loadKnownTypes(path, bs, fl.metrics, fl.processAnnotation) if err != nil { if !isUnrecognizedFile(err) { return err @@ -175,8 +183,11 @@ func (fl fileLoader) AsBundle(path string) (*bundle.Bundle, error) { return nil, err } - br := bundle.NewCustomReader(bundleLoader).WithMetrics(fl.metrics).WithBundleVerificationConfig(fl.bvc). - WithSkipBundleVerification(fl.skipVerify) + br := bundle.NewCustomReader(bundleLoader). + WithMetrics(fl.metrics). + WithBundleVerificationConfig(fl.bvc). + WithSkipBundleVerification(fl.skipVerify). + WithProcessAnnotations(fl.processAnnotation) // For bundle directories add the full path in front of module file names // to simplify debugging. @@ -456,12 +467,12 @@ func allRec(path string, filter Filter, errors *Errors, loaded *Result, depth in } } -func loadKnownTypes(path string, bs []byte, m metrics.Metrics) (interface{}, error) { +func loadKnownTypes(path string, bs []byte, m metrics.Metrics, processAnnotation bool) (interface{}, error) { switch filepath.Ext(path) { case ".json": return loadJSON(path, bs, m) case ".rego": - return loadRego(path, bs, m) + return loadRego(path, bs, m, processAnnotation) case ".yaml", ".yml": return loadYAML(path, bs, m) default: @@ -498,9 +509,17 @@ func loadBundleFile(path string, bs []byte, m metrics.Metrics) (bundle.Bundle, e return br.Read() } -func loadRego(path string, bs []byte, m metrics.Metrics) (*RegoFile, error) { +func loadRego(path string, bs []byte, m metrics.Metrics, parserOptions ...bool) (*RegoFile, error) { m.Timer(metrics.RegoModuleParse).Start() - module, err := ast.ParseModule(path, string(bs)) + var module *ast.Module + var err error + if len(parserOptions) == 1 { + module, err = ast.ParseModuleWithOpts(path, string(bs), ast.ParserOptions{ + ProcessAnnotation: parserOptions[0], + }) + } else { + module, err = ast.ParseModule(path, string(bs)) + } m.Timer(metrics.RegoModuleParse).Stop() if err != nil { return nil, err diff --git a/rego/rego.go b/rego/rego.go index 16eb3b16e9..526e57f6d0 100644 --- a/rego/rego.go +++ b/rego/rego.go @@ -1639,7 +1639,10 @@ func (r *Rego) loadFiles(ctx context.Context, txn storage.Transaction, m metrics m.Timer(metrics.RegoLoadFiles).Start() defer m.Timer(metrics.RegoLoadFiles).Stop() - result, err := loader.NewFileLoader().WithMetrics(m).Filtered(r.loadPaths.paths, r.loadPaths.filter) + result, err := loader.NewFileLoader(). + WithMetrics(m). + WithProcessAnnotation(r.schemaSet != nil). + Filtered(r.loadPaths.paths, r.loadPaths.filter) if err != nil { return err } @@ -1665,7 +1668,11 @@ func (r *Rego) loadBundles(ctx context.Context, txn storage.Transaction, m metri defer m.Timer(metrics.RegoLoadBundles).Stop() for _, path := range r.bundlePaths { - bndl, err := loader.NewFileLoader().WithMetrics(m).WithSkipBundleVerification(r.skipBundleVerification).AsBundle(path) + bndl, err := loader.NewFileLoader(). + WithMetrics(m). + WithProcessAnnotation(r.schemaSet != nil). + WithSkipBundleVerification(r.skipBundleVerification). + AsBundle(path) if err != nil { return fmt.Errorf("loading error: %s", err) } @@ -1815,8 +1822,7 @@ func (r *Rego) compileQuery(query ast.Body, m metrics.Metrics, extras []extraSta WithPackage(pkg). WithImports(imports) - var qc ast.QueryCompiler - qc = r.compiler.QueryCompiler(). + qc := r.compiler.QueryCompiler(). WithContext(qctx). WithUnsafeBuiltins(r.unsafeBuiltins) diff --git a/rego/rego_test.go b/rego/rego_test.go index b0c1bc7e50..021592323d 100644 --- a/rego/rego_test.go +++ b/rego/rego_test.go @@ -1861,11 +1861,14 @@ func TestPrepareAndCompileWithSchema(t *testing.T) { var schema interface{} err := util.Unmarshal([]byte(schemaBytes), &schema) + schemaSet := ast.NewSchemaSet() + schemaSet.ByPath.Put(ast.InputRootRef, schema) + r := New( Query("data.test.x"), Module("", module), Package("foo"), - Schemas(&ast.SchemaSet{ByPath: map[string]interface{}{"input": schema}}), + Schemas(schemaSet), ) ctx := context.Background() diff --git a/types/types.go b/types/types.go index b364241ef9..d0e88eaa80 100644 --- a/types/types.go +++ b/types/types.go @@ -311,6 +311,16 @@ func (t *Object) DynamicValue() Type { return t.dynamic.Value } +// DynamicProperties returns the type of the object's dynamic elements. +func (t *Object) DynamicProperties() *DynamicProperty { + return t.dynamic +} + +// StaticProperties returns the type of the object's static elements. +func (t *Object) StaticProperties() []*StaticProperty { + return t.static +} + // Keys returns the keys of the object's static elements. func (t *Object) Keys() []interface{} { sl := make([]interface{}, 0, len(t.static))