Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use local definitions for k8s schema validation #20544

Merged
merged 3 commits into from
Dec 29, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,12 @@ repos:
files: ^chart/values\.schema\.json$|^chart/values_schema\.schema\.json$
require_serial: true
additional_dependencies: ['jsonschema==3.2.0', 'PyYAML==5.3.1', 'requests==2.25.0']
- id: vendor-k8s-json-schema
name: Vendor k8s definitions into values.schema.json
entry: ./scripts/ci/pre_commit/pre_commit_vendor_k8s_json_schema.py
language: python
files: ^chart/values\.schema\.json$
additional_dependencies: ['requests==2.25.0']
- id: json-schema
name: Lint chart/values.yaml file with JSON Schema
entry: ./scripts/ci/pre_commit/pre_commit_json_schema.py
Expand Down
3 changes: 2 additions & 1 deletion BREEZE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2209,7 +2209,8 @@ This is the current syntax for `./breeze <./breeze>`_:
pyupgrade restrict-start_date rst-backticks setup-order setup-extra-packages
shellcheck sort-in-the-wild sort-spelling-wordlist stylelint trailing-whitespace
ui-lint update-breeze-file update-extras update-local-yml-file update-setup-cfg-file
update-versions verify-db-migrations-documented version-sync www-lint yamllint yesqa
update-versions vendor-k8s-json-schema verify-db-migrations-documented version-sync
www-lint yamllint yesqa

You can pass extra arguments including options to the pre-commit framework as
<EXTRA_ARGS> passed after --. For example:
Expand Down
2 changes: 2 additions & 0 deletions STATIC_CODE_CHECKS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,8 @@ require Breeze Docker images to be installed locally.
------------------------------------ ---------------------------------------------------------------- ------------
``update-versions`` Updates latest versions in the documentation
------------------------------------ ---------------------------------------------------------------- ------------
``vendor-k8s-json-schema`` Vendor k8s schema definitions in the helm chart schema file
------------------------------------ ---------------------------------------------------------------- ------------
``verify-db-migrations-documented`` Verify DB Migrations have been documented
------------------------------------ ---------------------------------------------------------------- ------------
``www-lint`` Static checks of js in airflow/www/static/js/ folder
Expand Down
1 change: 1 addition & 0 deletions breeze-complete
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ update-extras
update-local-yml-file
update-setup-cfg-file
update-versions
vendor-k8s-json-schema
verify-db-migrations-documented
version-sync
www-lint
Expand Down
2,811 changes: 2,722 additions & 89 deletions chart/values.schema.json

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions scripts/ci/pre_commit/pre_commit_chart_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@
# The value of this parameter is passed to statsd_exporter, which does not have a strict type definition.
"$['properties']['statsd']['properties']['extraMappings']",
}
VENDORED_PATHS = {
# We don't want to check the upstream k8s definitions
"$['definitions']['io.k8s",
}

SCHEMA = json.loads((CHART_DIR / "values.schema.json").read_text())

Expand All @@ -57,6 +61,13 @@ def walk(value, path='$'):
yield from walk(v, path + f"[{no}]")


def is_vendored_path(path: str) -> bool:
for prefix in VENDORED_PATHS:
if path.startswith(prefix):
return True
return False


def validate_object_types():
all_object_types = ((d, p) for d, p in walk(SCHEMA) if type(d) == dict and d.get('type') == 'object')
all_object_types_with_a_loose_definition = [
Expand All @@ -66,6 +77,7 @@ def validate_object_types():
and "$ref" not in d
and type(d.get('additionalProperties')) != dict
and p not in KNOWN_INVALID_TYPES
and not is_vendored_path(p)
]
to_display_invalid_types = [
(d, p) for d, p in all_object_types_with_a_loose_definition if p not in KNOWN_INVALID_TYPES
Expand Down
87 changes: 87 additions & 0 deletions scripts/ci/pre_commit/pre_commit_vendor_k8s_json_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#!/usr/bin/env python3

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import json

import requests

K8S_DEFINITIONS = (
"https://raw.githubusercontent.com/yannh/kubernetes-json-schema"
"/master/v1.22.0-standalone-strict/_definitions.json"
)
VALUES_SCHEMA_FILE = "chart/values.schema.json"


with open(VALUES_SCHEMA_FILE) as f:
schema = json.load(f)

refs = set()


def find_refs(props: dict):
for value in props.values():
if "$ref" in value:
refs.add(value["$ref"])

if "items" in value:
if "$ref" in value["items"]:
refs.add(value["items"]["$ref"])

if "properties" in value:
find_refs(value["properties"])
jedcunningham marked this conversation as resolved.
Show resolved Hide resolved


def get_remote_schema(url: str) -> dict:
req = requests.get(url)
req.raise_for_status()
return json.loads(req.text)
jedcunningham marked this conversation as resolved.
Show resolved Hide resolved


# Create 'definitions' if it doesn't exist or reset it
schema["definitions"] = {}

# Get the k8s defs
defs = get_remote_schema(K8S_DEFINITIONS)

# first find refs in our schema
find_refs(schema["properties"])
jedcunningham marked this conversation as resolved.
Show resolved Hide resolved

# now we look for refs in refs
i = 0
while True:
starting_refs = refs.copy()
for ref in refs:
ref_id = ref.split('/')[-1]
schema["definitions"][ref_id] = defs["definitions"][ref_id]
find_refs(schema["definitions"])
if refs == starting_refs:
break

# Make sure we don't have a runaway loop
i += 1
if i > 15:
raise Exception("Wasn't able to find all nested references in 15 cycles")

# and finally, sort them all!
schema["definitions"] = dict(sorted(schema["definitions"].items()))

# Then write out our schema
with open(VALUES_SCHEMA_FILE, 'w') as f:
json.dump(schema, f, indent=4)
f.write('\n') # with a newline!