Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] ✨ Create kube-state-metrics configuration from markers #9347

Draft
wants to merge 13 commits into
base: main
Choose a base branch
from
34 changes: 21 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ export GO111MODULE=on
#
# Kubebuilder.
#
export KUBEBUILDER_ENVTEST_KUBERNETES_VERSION ?= 1.28.0
export KUBEBUILDER_ENVTEST_KUBERNETES_VERSION ?= 1.27.1
export KUBEBUILDER_CONTROLPLANE_START_TIMEOUT ?= 60s
export KUBEBUILDER_CONTROLPLANE_STOP_TIMEOUT ?= 60s

Expand All @@ -64,6 +64,7 @@ CAPD_DIR := $(TEST_DIR)/infrastructure/docker
CAPIM_DIR := $(TEST_DIR)/infrastructure/inmemory
TEST_EXTENSION_DIR := $(TEST_DIR)/extension
GO_INSTALL := ./scripts/go_install.sh
GO_TOOLS_BUILD := ./hack/go-tools-build.sh
OBSERVABILITY_DIR := hack/observability

export PATH := $(abspath $(TOOLS_BIN_DIR)):$(PATH)
Expand Down Expand Up @@ -169,6 +170,12 @@ GOVULNCHECK_VER := v1.0.0
GOVULNCHECK := $(abspath $(TOOLS_BIN_DIR)/$(GOVULNCHECK_BIN)-$(GOVULNCHECK_VER))
GOVULNCHECK_PKG := golang.org/x/vuln/cmd/govulncheck

KUBE_STATE_METRICS_VER := e31ed9ab
KUBE_STATE_METRICS_BIN := kube-state-metrics
KUBE_STATE_METRICS := $(abspath $(TOOLS_BIN_DIR)/$(KUBE_STATE_METRICS_BIN)-$(KUBE_STATE_METRICS_VER))
KUBE_STATE_METRICS_PKG := k8s.io/kube-state-metrics/v2
KUBE_STATE_METRICS_MOD_REPLACE := $(KUBE_STATE_METRICS_PKG)=github.com/chrischdi/kube-state-metrics/v2@$(KUBE_STATE_METRICS_VER)

CONVERSION_VERIFIER_BIN := conversion-verifier
CONVERSION_VERIFIER := $(abspath $(TOOLS_BIN_DIR)/$(CONVERSION_VERIFIER_BIN))

Expand Down Expand Up @@ -556,18 +563,13 @@ generate-e2e-templates-main: $(KUSTOMIZE)
$(KUSTOMIZE) build $(INMEMORY_TEMPLATES)/main/cluster-template --load-restrictor LoadRestrictionsNone > $(INMEMORY_TEMPLATES)/main/cluster-template.yaml

.PHONY: generate-metrics-config
generate-metrics-config: $(ENVSUBST_BIN) ## Generate ./hack/observability/kube-state-metrics/crd-config.yaml
OUTPUT_FILE="${OBSERVABILITY_DIR}/kube-state-metrics/crd-config.yaml"; \
METRICS_DIR="${OBSERVABILITY_DIR}/kube-state-metrics/metrics"; \
echo "# This file was auto-generated via: make generate-metrics-config" > "$${OUTPUT_FILE}"; \
cat "$${METRICS_DIR}/header.yaml" >> "$${OUTPUT_FILE}"; \
for resource in clusterclass cluster kubeadmcontrolplane kubeadmconfig machine machinedeployment machinehealthcheck machineset machinepool; do \
cat "$${METRICS_DIR}/$${resource}.yaml"; \
RESOURCE="$${resource}" ${ENVSUBST_BIN} < "$${METRICS_DIR}/common_metrics.yaml"; \
if [[ "$${resource}" != "cluster" ]]; then \
cat "$${METRICS_DIR}/owner_metric.yaml"; \
fi \
done >> "$${OUTPUT_FILE}"; \
generate-metrics-config: $(KUBE_STATE_METRICS) ## Generate ./hack/observability/kube-state-metrics/crd-config.yaml
$(KUBE_STATE_METRICS) generate \
./api/... \
./controlplane/kubeadm/api/... \
./bootstrap/kubeadm/api/... \
./exp/api/... \
> "${OBSERVABILITY_DIR}/kube-state-metrics/crd-config.yaml"

.PHONY: generate-diagrams
generate-diagrams: ## Generate diagrams for *.plantuml files
Expand Down Expand Up @@ -1292,6 +1294,9 @@ $(GOLANGCI_LINT_BIN): $(GOLANGCI_LINT) ## Build a local copy of golangci-lint.
.PHONY: $(GOVULNCHECK_BIN)
$(GOVULNCHECK_BIN): $(GOVULNCHECK) ## Build a local copy of govulncheck.

.PHONY: $(KUBE_STATE_METRICS_BIN)
$(KUBE_STATE_METRICS_BIN): $(KUBE_STATE_METRICS) ## Build a local copy of metric-gen.

$(CONTROLLER_GEN): # Build controller-gen from tools folder.
GOBIN=$(TOOLS_BIN_DIR) $(GO_INSTALL) $(CONTROLLER_GEN_PKG) $(CONTROLLER_GEN_BIN) $(CONTROLLER_GEN_VER)

Expand Down Expand Up @@ -1346,6 +1351,9 @@ $(GOLANGCI_LINT): # Build golangci-lint from tools folder.
$(GOVULNCHECK): # Build govulncheck.
GOBIN=$(TOOLS_BIN_DIR) $(GO_INSTALL) $(GOVULNCHECK_PKG) $(GOVULNCHECK_BIN) $(GOVULNCHECK_VER)

$(KUBE_STATE_METRICS): # Build kube-state-metrics.
GOBIN=$(TOOLS_BIN_DIR) GOMOD_REPLACE="$(KUBE_STATE_METRICS_MOD_REPLACE)" $(GO_TOOLS_BUILD) $(KUBE_STATE_METRICS_PKG) $(KUBE_STATE_METRICS_BIN) $(KUBE_STATE_METRICS_VER)

## --------------------------------------
## Helpers
## --------------------------------------
Expand Down
15 changes: 15 additions & 0 deletions Tiltfile
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,19 @@ def deploy_observability():
objects = ["capi-visualizer:serviceaccount"],
)

def deploy_kustomizations():
for name in settings.get("deploy_kustomizations", []):
yaml = read_file("./.tiltbuild/yaml/{}.kustomization.yaml".format(name))
k8s_yaml(yaml)
objs = decode_yaml_stream(yaml)
print("objects")
print(find_all_objects_names(objs))
k8s_resource(
new_name = name,
objects = find_all_objects_names(objs),
labels = ["kustomization"],
)

def prepare_all():
tools_arg = "--tools kustomize,envsubst,clusterctl "
tilt_settings_file_arg = "--tilt-settings-file " + tilt_file
Expand Down Expand Up @@ -640,6 +653,8 @@ deploy_provider_crds()

deploy_observability()

deploy_kustomizations()

enable_providers()

cluster_templates()
13 changes: 12 additions & 1 deletion api/v1beta1/cluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ const (
type ClusterSpec struct {
// Paused can be used to prevent controllers from processing the Cluster and all its associated objects.
// +optional
// +Metrics:gauge:name="spec_paused",help="Whether the cluster is paused and any of its resources will not be processed by the controllers.",nilIsZero=true
Paused bool `json:"paused,omitempty"`

// Cluster network configuration.
Expand Down Expand Up @@ -406,6 +407,7 @@ type ClusterStatus struct {
// Phase represents the current phase of cluster actuation.
// E.g. Pending, Running, Terminating, Failed etc.
// +optional
// +Metrics:stateset:name="status_phase",help="The clusters current phase.",labelName="phase",list={"Pending","Provisioning","Provisioned","Deleting","Failed","Unknown"}
Phase string `json:"phase,omitempty"`

// InfrastructureReady is the state of the infrastructure provider.
Expand All @@ -418,6 +420,8 @@ type ClusterStatus struct {

// Conditions defines current service state of the cluster.
// +optional
// +Metrics:stateset:name="status_condition",help="The condition of a cluster.",labelName="status",JSONPath=.status,list={"True","False","Unknown"},labelsFromPath={"type":".type"}
// +Metrics:gauge:name="status_condition_last_transition_time",help="The condition last transition time of a cluster.",valueFrom=.lastTransitionTime,labelsFromPath={"type":".type","status":".status"}
Conditions Conditions `json:"conditions,omitempty"`

// ObservedGeneration is the latest generation observed by the controller.
Expand Down Expand Up @@ -486,8 +490,15 @@ func (v APIEndpoint) String() string {
// +kubebuilder:printcolumn:name="Version",type="string",JSONPath=".spec.topology.version",description="Kubernetes version associated with this Cluster"

// Cluster is the Schema for the clusters API.
// +Metrics:gvk:namePrefix="capi_cluster"
// +Metrics:labelFromPath:name="name",JSONPath=.metadata.name
// +Metrics:labelFromPath:name="namespace",JSONPath=.metadata.namespace
// +Metrics:labelFromPath:name="uid",JSONPath=.metadata.uid
// +Metrics:info:name="info",help="Information about a cluster.",labelsFromPath={topology_version:".spec.topology.version",topology_class:".spec.topology.class",control_plane_endpoint_host:".spec.controlPlaneEndpoint.host",control_plane_endpoint_port:".spec.controlPlaneEndpoint.port",control_plane_reference_kind:".spec.controlPlaneRef.kind",control_plane_reference_name:".spec.controlPlaneRef.name",infrastructure_reference_kind:".spec.infrastructureRef.kind",infrastructure_reference_name:".spec.infrastructureRef.name"}
type Cluster struct {
metav1.TypeMeta `json:",inline"`
metav1.TypeMeta `json:",inline"`
// +Metrics:gauge:name="created",JSONPath=".creationTimestamp",help="Unix creation timestamp."
// +Metrics:info:name="annotation_paused",JSONPath=.annotations['cluster\.x-k8s\.io/paused'],help="Whether the cluster is paused and any of its resources will not be processed by the controllers.",labelsFromPath={paused_value:"."}
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec ClusterSpec `json:"spec,omitempty"`
Expand Down
12 changes: 11 additions & 1 deletion api/v1beta1/clusterclass_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,16 @@ const ClusterClassKind = "ClusterClass"
// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp",description="Time duration since creation of ClusterClass"

// ClusterClass is a template which can be used to create managed topologies.
// +Metrics:gvk:namePrefix="capi_clusterclass"
// +Metrics:labelFromPath:name="name",JSONPath=.metadata.name
// +Metrics:labelFromPath:name="namespace",JSONPath=.metadata.namespace
// +Metrics:labelFromPath:name="uid",JSONPath=.metadata.uid
// +Metrics:info:name="info",help="Information about a clusterclass.",labelsFromPath={name:.metadata.name}
type ClusterClass struct {
metav1.TypeMeta `json:",inline"`
metav1.TypeMeta `json:",inline"`
// +Metrics:gauge:name="created",JSONPath=".creationTimestamp",help="Unix creation timestamp."
// +Metrics:info:name="annotation_paused",JSONPath=.annotations['cluster\.x-k8s\.io/paused'],help="Whether the clusterclass is paused and any of its resources will not be processed by the controllers.",labelsFromPath={paused_value:"."}
// +Metrics:info:name="owner",JSONPath=".ownerReferences",help="Owner references.",labelsFromPath={owner_is_controller:".controller",owner_kind:".kind",owner_name:".name",owner_uid:".uid"}
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec ClusterClassSpec `json:"spec,omitempty"`
Expand Down Expand Up @@ -647,6 +655,8 @@ type ClusterClassStatus struct {

// Conditions defines current observed state of the ClusterClass.
// +optional
// +Metrics:stateset:name="status_condition",help="The condition of a clusterclass.",labelName="status",JSONPath=.status,list={"True","False","Unknown"},labelsFromPath={"type":".type"}
// +Metrics:gauge:name="status_condition_last_transition_time",help="The condition last transition time of a clusterclass.",valueFrom=.lastTransitionTime,labelsFromPath={"type":".type","status":".status"}
Conditions Conditions `json:"conditions,omitempty"`

// ObservedGeneration is the latest generation observed by the controller.
Expand Down
17 changes: 16 additions & 1 deletion api/v1beta1/machine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ type MachineSpec struct {
type MachineStatus struct {
// NodeRef will point to the corresponding Node if it exists.
// +optional
// +Metrics:info:name="status_noderef",help="Information about the node reference of a machine.",labelsFromPath={node_name:".name",node_uid:".uid"}
NodeRef *corev1.ObjectReference `json:"nodeRef,omitempty"`

// NodeInfo is a set of ids/uuids to uniquely identify the node.
Expand Down Expand Up @@ -194,16 +195,19 @@ type MachineStatus struct {
// Addresses is a list of addresses assigned to the machine.
// This field is copied from the infrastructure provider reference.
// +optional
// +Metrics:info:name="addresses",help="Address information about a machine.",labelsFromPath={address:".address",type:".type"}
Addresses MachineAddresses `json:"addresses,omitempty"`

// Phase represents the current phase of machine actuation.
// E.g. Pending, Running, Terminating, Failed etc.
// +optional
// +Metrics:stateset:name="status_phase",help="The machines current phase.",labelName="phase",list={"Pending","Provisioning","Provisioned","Running","Deleting","Deleted","Failed","Unknown"}
Phase string `json:"phase,omitempty"`

// CertificatesExpiryDate is the expiry date of the machine certificates.
// This value is only set for control plane machines.
// +optional
// +Metrics:gauge:name="status_certificatesexpirydate",help="Information about certificate expiration date of a control plane node.",nilIsZero=true
CertificatesExpiryDate *metav1.Time `json:"certificatesExpiryDate,omitempty"`

// BootstrapReady is the state of the bootstrap provider.
Expand All @@ -220,6 +224,8 @@ type MachineStatus struct {

// Conditions defines current service state of the Machine.
// +optional
// +Metrics:stateset:name="status_condition",help="The condition of a machine.",labelName="status",JSONPath=".status",list={"True","False","Unknown"},labelsFromPath={"type":".type"}
// +Metrics:gauge:name="status_condition_last_transition_time",help="The condition last transition time of a machine.",valueFrom=.lastTransitionTime,labelsFromPath={"type":".type","status":".status"}
Conditions Conditions `json:"conditions,omitempty"`
}

Expand Down Expand Up @@ -279,8 +285,17 @@ type Bootstrap struct {
// +kubebuilder:printcolumn:name="Version",type="string",JSONPath=".spec.version",description="Kubernetes version associated with this Machine"

// Machine is the Schema for the machines API.
// +Metrics:gvk:namePrefix="capi_machine"
// +Metrics:labelFromPath:name="name",JSONPath=".metadata.name"
// +Metrics:labelFromPath:name="namespace",JSONPath=".metadata.namespace"
// +Metrics:labelFromPath:name="uid",JSONPath=".metadata.uid"
// +Metrics:labelFromPath:name="cluster_name",JSONPath=".spec.clusterName"
// +Metrics:info:name="info",help="Information about a machine.",labelsFromPath={bootstrap_reference_kind:.spec.bootstrap.configRef.kind,bootstrap_reference_name:.spec.bootstrap.configRef.name,container_runtime_version:.status.nodeInfo.containerRuntimeVersion,control_plane_name:.metadata.labels.cluster\.x-k8s\.io/control-plane-name,failure_domain:.spec.failureDomain,infrastructure_reference_kind:.spec.infrastructureRef.kind,infrastructure_reference_name:.spec.infrastructureRef.name,kernel_version:.status.nodeInfo.kernelVersion,kube_proxy_version:.status.nodeInfo.kubeProxyVersion,kubelet_version:.status.nodeInfo.kubeletVersion,os_image:.status.nodeInfo.osImage,provider_id:.spec.providerID,version:.spec.version}
type Machine struct {
metav1.TypeMeta `json:",inline"`
metav1.TypeMeta `json:",inline"`
// +Metrics:gauge:name="created",JSONPath=".creationTimestamp",help="Unix creation timestamp."
// +Metrics:info:name="annotation_paused",JSONPath=.annotations['cluster\.x-k8s\.io/paused'],help="Whether the machine is paused and any of its resources will not be processed by the controllers.",labelsFromPath={paused_value:"."}
// +Metrics:info:name="owner",JSONPath=".ownerReferences",help="Owner references.",labelsFromPath={owner_is_controller:".controller",owner_kind:".kind",owner_name:".name",owner_uid:".uid"}
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec MachineSpec `json:"spec,omitempty"`
Expand Down
23 changes: 22 additions & 1 deletion api/v1beta1/machinedeployment_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ type MachineDeploymentSpec struct {
// * An existing MachineDeployment which initially wasn't controlled by the autoscaler
// should be later controlled by the autoscaler
// +optional
// +Metrics:gauge:name="spec_replicas",help="The number of desired machines for a machinedeployment."
Replicas *int32 `json:"replicas,omitempty"`

// RolloutAfter is a field to indicate a rollout should be performed
Expand Down Expand Up @@ -134,6 +135,7 @@ type MachineDeploymentSpec struct {

// Indicates that the deployment is paused.
// +optional
// +Metrics:gauge:name="spec_paused",help="Whether the machinedeployment is paused and any of its resources will not be processed by the controllers.",nilIsZero=true
Paused bool `json:"paused,omitempty"`

// The maximum time in seconds for a deployment to make progress before it
Expand Down Expand Up @@ -183,6 +185,7 @@ type MachineRollingUpdateDeployment struct {
// that the total number of machines available at all times
// during the update is at least 70% of desired machines.
// +optional
// +Metrics:gauge:name="spec_strategy_rollingupdate_max_unavailable",help="Maximum number of unavailable replicas during a rolling update of a machinedeployment."
MaxUnavailable *intstr.IntOrString `json:"maxUnavailable,omitempty"`

// The maximum number of machines that can be scheduled above the
Expand All @@ -199,6 +202,7 @@ type MachineRollingUpdateDeployment struct {
// be scaled up further, ensuring that total number of machines running
// at any time during the update is at most 130% of desired machines.
// +optional
// +Metrics:gauge:name="spec_strategy_rollingupdate_max_surge",help="Maximum number of replicas that can be scheduled above the desired number of replicas during a rolling update of a machinedeployment."
MaxSurge *intstr.IntOrString `json:"maxSurge,omitempty"`

// DeletePolicy defines the policy used by the MachineDeployment to identify nodes to delete when downscaling.
Expand Down Expand Up @@ -228,20 +232,24 @@ type MachineDeploymentStatus struct {
// Total number of non-terminated machines targeted by this deployment
// (their labels match the selector).
// +optional
// +Metrics:gauge:name="status_replicas",help="The number of replicas per machinedeployment.",nilIsZero=true
Replicas int32 `json:"replicas"`

// Total number of non-terminated machines targeted by this deployment
// that have the desired template spec.
// +optional
// +Metrics:gauge:name="status_replicas_updated",help="The number of updated replicas per machinedeployment.",nilIsZero=true
UpdatedReplicas int32 `json:"updatedReplicas"`

// Total number of ready machines targeted by this deployment.
// +optional
// +Metrics:gauge:name="status_replicas_ready",help="The number of ready replicas per machinedeployment.",nilIsZero=true
ReadyReplicas int32 `json:"readyReplicas"`

// Total number of available machines (ready for at least minReadySeconds)
// targeted by this deployment.
// +optional
// +Metrics:gauge:name="status_replicas_available",help="The number of available replicas per machinedeployment.",nilIsZero=true
AvailableReplicas int32 `json:"availableReplicas"`

// Total number of unavailable machines targeted by this deployment.
Expand All @@ -250,14 +258,18 @@ type MachineDeploymentStatus struct {
// be machines that are running but not yet available or machines
// that still have not been created.
// +optional
// +Metrics:gauge:name="status_replicas_unavailable",help="The number of unavailable replicas per machinedeployment.",nilIsZero=true
UnavailableReplicas int32 `json:"unavailableReplicas"`

// Phase represents the current phase of a MachineDeployment (ScalingUp, ScalingDown, Running, Failed, or Unknown).
// +optional
// +Metrics:stateset:name="status_phase",help="The machinedeployments current phase.",labelName="phase",list={"ScalingUp","ScalingDown","Running","Failed","Unknown"}
Phase string `json:"phase,omitempty"`

// Conditions defines current service state of the MachineDeployment.
// +optional
// +Metrics:stateset:name="status_condition",help="The condition of a machinedeployment.",labelName="status",JSONPath=".status",list={"True","False","Unknown"},labelsFromPath={"type":".type"}
// +Metrics:gauge:name="status_condition_last_transition_time",help="The condition last transition time of a machinedeployment.",valueFrom=.lastTransitionTime,labelsFromPath={"type":".type","status":".status"}
Conditions Conditions `json:"conditions,omitempty"`
}

Expand Down Expand Up @@ -319,8 +331,17 @@ func (md *MachineDeploymentStatus) GetTypedPhase() MachineDeploymentPhase {
// +kubebuilder:printcolumn:name="Version",type="string",JSONPath=".spec.template.spec.version",description="Kubernetes version associated with this MachineDeployment"

// MachineDeployment is the Schema for the machinedeployments API.
// +Metrics:gvk:namePrefix="capi_machinedeployment"
// +Metrics:labelFromPath:name="name",JSONPath=".metadata.name"
// +Metrics:labelFromPath:name="namespace",JSONPath=".metadata.namespace"
// +Metrics:labelFromPath:name="uid",JSONPath=".metadata.uid"
// +Metrics:labelFromPath:name="cluster_name",JSONPath=".spec.clusterName"
// +Metrics:info:name="info",help="Information about a machinedeployment.",labelsFromPath={bootstrap_reference_kind:.spec.template.spec.bootstrap.configRef.kind,bootstrap_reference_name:.spec.template.spec.bootstrap.configRef.name,infrastructure_reference_kind:.spec.template.spec.infrastructureRef.kind,infrastructure_reference_name:.spec.template.spec.infrastructureRef.name,version:.spec.template.spec.version}
type MachineDeployment struct {
metav1.TypeMeta `json:",inline"`
metav1.TypeMeta `json:",inline"`
// +Metrics:gauge:name="created",JSONPath=".creationTimestamp",help="Unix creation timestamp."
// +Metrics:info:name="annotation_paused",JSONPath=.annotations['cluster\.x-k8s\.io/paused'],help="Whether the machinedeployment is paused and any of its resources will not be processed by the controllers.",labelsFromPath={paused_value:"."}
// +Metrics:info:name="owner",JSONPath=".ownerReferences",help="Owner references.",labelsFromPath={owner_is_controller:".controller",owner_kind:".kind",owner_name:".name",owner_uid:".uid"}
metav1.ObjectMeta `json:"metadata,omitempty"`

Spec MachineDeploymentSpec `json:"spec,omitempty"`
Expand Down
Loading