diff --git a/operator/CHANGELOG.md b/operator/CHANGELOG.md index e7c713d2c99df..61fa6b087ebbd 100644 --- a/operator/CHANGELOG.md +++ b/operator/CHANGELOG.md @@ -1,5 +1,6 @@ ## Main +- [9315](https://github.com/grafana/loki/pull/9315) **aminesnow**: Add zone awareness spec to LokiStack - [9339](https://github.com/grafana/loki/pull/9339) **JoaoBraveCoding**: Add default PodAntiAffinity to Ruler - [9329](https://github.com/grafana/loki/pull/9329) **JoaoBraveCoding**: Add default PodAntiAffinity to Ingester - [9262](https://github.com/grafana/loki/pull/9262) **btaani**: Add PodDisruptionBudget to the Ruler diff --git a/operator/apis/loki/v1/lokistack_types.go b/operator/apis/loki/v1/lokistack_types.go index 4eeddec2b7a05..e0bf4139042fc 100644 --- a/operator/apis/loki/v1/lokistack_types.go +++ b/operator/apis/loki/v1/lokistack_types.go @@ -766,6 +766,7 @@ type LokiStackSpec struct { // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Cluster Proxy" Proxy *ClusterProxy `json:"proxy,omitempty"` + // Deprecated: Please use replication.factor instead. This field will be removed in future versions of this CRD. // ReplicationFactor defines the policy for log stream replication. // // +optional @@ -774,7 +775,14 @@ type LokiStackSpec struct { // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:number",displayName="Replication Factor" ReplicationFactor int32 `json:"replicationFactor,omitempty"` - // Rules defines the spec for the ruler component + // Replication defines the configuration for Loki data replication. + // + // +optional + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Replication Spec" + Replication *ReplicationSpec `json:"replication,omitempty"` + + // Rules defines the spec for the ruler component. // // +optional // +kubebuilder:validation:Optional @@ -788,7 +796,7 @@ type LokiStackSpec struct { // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:advanced",displayName="Rate Limiting" Limits *LimitsSpec `json:"limits,omitempty"` - // Template defines the resource/limits/tolerations/nodeselectors per component + // Template defines the resource/limits/tolerations/nodeselectors per component. // // +optional // +kubebuilder:validation:Optional @@ -803,6 +811,40 @@ type LokiStackSpec struct { Tenants *TenantsSpec `json:"tenants,omitempty"` } +type ReplicationSpec struct { + // Factor defines the policy for log stream replication. + // + // +optional + // +kubebuilder:validation:Optional + // +kubebuilder:validation:Minimum:=1 + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:number",displayName="Replication Factor" + Factor int32 `json:"factor,omitempty"` + + // Zones defines an array of ZoneSpec that the scheduler will try to satisfy. + // + // +optional + // +kubebuilder:validation:Optional + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Zones Spec" + Zones []ZoneSpec `json:"zones,omitempty"` +} + +// ZoneSpec defines the spec to support zone-aware component deployments. +type ZoneSpec struct { + // MaxSkew describes the maximum degree to which Pods can be unevenly distributed. + // + // +required + // +kubebuilder:default:=1 + // +operator-sdk:csv:customresourcedefinitions:type=spec,xDescriptors="urn:alm:descriptor:com.tectonic.ui:number",displayName="Max Skew" + MaxSkew int `json:"maxSkew"` + + // TopologyKey is the key that defines a topology in the Nodes' labels. + // + // +required + // +kubebuilder:validation:Required + // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Topology Key" + TopologyKey string `json:"topologyKey"` +} + // LokiStackConditionType deifnes the type of condition types of a Loki deployment. type LokiStackConditionType string diff --git a/operator/apis/loki/v1/v1.go b/operator/apis/loki/v1/v1.go index 219edcedd8320..4863cd693623b 100644 --- a/operator/apis/loki/v1/v1.go +++ b/operator/apis/loki/v1/v1.go @@ -57,6 +57,12 @@ var ( ErrSchemaRetroactivelyChanged = errors.New("Cannot retroactively change schema") // ErrHeaderAuthCredentialsConflict when both Credentials and CredentialsFile are used in a header authentication client. ErrHeaderAuthCredentialsConflict = errors.New("credentials and credentialsFile cannot be used at the same time") + // ErrReplicationZonesNodes when there is an error retrieving nodes with replication zones labels. + ErrReplicationZonesNodes = errors.New("Failed to retrieve nodes for zone replication") + // ErrReplicationFactorToZonesRatio when the replication factor defined is greater than the number of available zones. + ErrReplicationFactorToZonesRatio = errors.New("replication factor is greater than the number of available zones") + // ErrReplicationSpecConflict when both the ReplicationSpec and depricated ReplicationFactor are used. + ErrReplicationSpecConflict = errors.New("replicationSpec and replicationFactor (deprecated) cannot be used at the same time") // ErrRuleMustMatchNamespace indicates that an expression used in an alerting or recording rule is missing // matchers for a namespace. diff --git a/operator/apis/loki/v1/zz_generated.deepcopy.go b/operator/apis/loki/v1/zz_generated.deepcopy.go index d30a5b8bfeef0..9eba6833e012e 100644 --- a/operator/apis/loki/v1/zz_generated.deepcopy.go +++ b/operator/apis/loki/v1/zz_generated.deepcopy.go @@ -770,6 +770,11 @@ func (in *LokiStackSpec) DeepCopyInto(out *LokiStackSpec) { *out = new(ClusterProxy) **out = **in } + if in.Replication != nil { + in, out := &in.Replication, &out.Replication + *out = new(ReplicationSpec) + (*in).DeepCopyInto(*out) + } if in.Rules != nil { in, out := &in.Rules, &out.Rules *out = new(RulesSpec) @@ -1303,6 +1308,26 @@ func (in *RemoteWriteSpec) DeepCopy() *RemoteWriteSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ReplicationSpec) DeepCopyInto(out *ReplicationSpec) { + *out = *in + if in.Zones != nil { + in, out := &in.Zones, &out.Zones + *out = make([]ZoneSpec, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReplicationSpec. +func (in *ReplicationSpec) DeepCopy() *ReplicationSpec { + if in == nil { + return nil + } + out := new(ReplicationSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RetentionLimitSpec) DeepCopyInto(out *RetentionLimitSpec) { *out = *in @@ -1613,3 +1638,18 @@ func (in *TenantsSpec) DeepCopy() *TenantsSpec { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ZoneSpec) DeepCopyInto(out *ZoneSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ZoneSpec. +func (in *ZoneSpec) DeepCopy() *ZoneSpec { + if in == nil { + return nil + } + out := new(ZoneSpec) + in.DeepCopyInto(out) + return out +} diff --git a/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml index 4494c360e97f4..56cf062946f42 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -438,12 +438,36 @@ spec: - description: NoProxy configures the NO_PROXY/no_proxy env variable. displayName: NoProxy path: proxy.noProxy - - description: ReplicationFactor defines the policy for log stream replication. + - description: Replication defines the configuration for Loki data replication. + displayName: Replication Spec + path: replication + - description: Factor defines the policy for log stream replication. + displayName: Replication Factor + path: replication.factor + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: Zones defines an array of ZoneSpec that the scheduler will try + to satisfy. + displayName: Zones Spec + path: replication.zones + - description: MaxSkew describes the maximum degree to which Pods can be unevenly + distributed. + displayName: Max Skew + path: replication.zones[0].maxSkew + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: TopologyKey is the key that defines a topology in the Nodes' + labels. + displayName: Topology Key + path: replication.zones[0].topologyKey + - description: 'Deprecated: Please use replication.factor instead. This field + will be removed in future versions of this CRD. ReplicationFactor defines + the policy for log stream replication.' displayName: Replication Factor path: replicationFactor x-descriptors: - urn:alm:descriptor:com.tectonic.ui:number - - description: Rules defines the spec for the ruler component + - description: Rules defines the spec for the ruler component. displayName: Rules path: rules x-descriptors: @@ -514,7 +538,7 @@ spec: x-descriptors: - urn:alm:descriptor:io.kubernetes:StorageClass - description: Template defines the resource/limits/tolerations/nodeselectors - per component + per component. displayName: Node Placement path: template x-descriptors: diff --git a/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml b/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml index 2fa9f521e9533..65b38988f4f8b 100644 --- a/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml +++ b/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml @@ -335,13 +335,45 @@ spec: description: NoProxy configures the NO_PROXY/no_proxy env variable. type: string type: object + replication: + description: Replication defines the configuration for Loki data replication. + properties: + factor: + description: Factor defines the policy for log stream replication. + format: int32 + minimum: 1 + type: integer + zones: + description: Zones defines an array of ZoneSpec that the scheduler + will try to satisfy. + items: + description: ZoneSpec defines the spec to support zone-aware + component deployments. + properties: + maxSkew: + default: 1 + description: MaxSkew describes the maximum degree to which + Pods can be unevenly distributed. + type: integer + topologyKey: + description: TopologyKey is the key that defines a topology + in the Nodes' labels. + type: string + required: + - maxSkew + - topologyKey + type: object + type: array + type: object replicationFactor: - description: ReplicationFactor defines the policy for log stream replication. + description: 'Deprecated: Please use replication.factor instead. This + field will be removed in future versions of this CRD. ReplicationFactor + defines the policy for log stream replication.' format: int32 minimum: 1 type: integer rules: - description: Rules defines the spec for the ruler component + description: Rules defines the spec for the ruler component. properties: enabled: description: Enabled defines a flag to enable/disable the ruler @@ -530,7 +562,7 @@ spec: type: string template: description: Template defines the resource/limits/tolerations/nodeselectors - per component + per component. properties: compactor: description: Compactor defines the compaction component spec. diff --git a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml index 4dda0d8186343..ad1c201ec1b05 100644 --- a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml @@ -438,12 +438,36 @@ spec: - description: NoProxy configures the NO_PROXY/no_proxy env variable. displayName: NoProxy path: proxy.noProxy - - description: ReplicationFactor defines the policy for log stream replication. + - description: Replication defines the configuration for Loki data replication. + displayName: Replication Spec + path: replication + - description: Factor defines the policy for log stream replication. + displayName: Replication Factor + path: replication.factor + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: Zones defines an array of ZoneSpec that the scheduler will try + to satisfy. + displayName: Zones Spec + path: replication.zones + - description: MaxSkew describes the maximum degree to which Pods can be unevenly + distributed. + displayName: Max Skew + path: replication.zones[0].maxSkew + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: TopologyKey is the key that defines a topology in the Nodes' + labels. + displayName: Topology Key + path: replication.zones[0].topologyKey + - description: 'Deprecated: Please use replication.factor instead. This field + will be removed in future versions of this CRD. ReplicationFactor defines + the policy for log stream replication.' displayName: Replication Factor path: replicationFactor x-descriptors: - urn:alm:descriptor:com.tectonic.ui:number - - description: Rules defines the spec for the ruler component + - description: Rules defines the spec for the ruler component. displayName: Rules path: rules x-descriptors: @@ -514,7 +538,7 @@ spec: x-descriptors: - urn:alm:descriptor:io.kubernetes:StorageClass - description: Template defines the resource/limits/tolerations/nodeselectors - per component + per component. displayName: Node Placement path: template x-descriptors: diff --git a/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml b/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml index cde876b838388..2109df5b68b53 100644 --- a/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml +++ b/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml @@ -335,13 +335,45 @@ spec: description: NoProxy configures the NO_PROXY/no_proxy env variable. type: string type: object + replication: + description: Replication defines the configuration for Loki data replication. + properties: + factor: + description: Factor defines the policy for log stream replication. + format: int32 + minimum: 1 + type: integer + zones: + description: Zones defines an array of ZoneSpec that the scheduler + will try to satisfy. + items: + description: ZoneSpec defines the spec to support zone-aware + component deployments. + properties: + maxSkew: + default: 1 + description: MaxSkew describes the maximum degree to which + Pods can be unevenly distributed. + type: integer + topologyKey: + description: TopologyKey is the key that defines a topology + in the Nodes' labels. + type: string + required: + - maxSkew + - topologyKey + type: object + type: array + type: object replicationFactor: - description: ReplicationFactor defines the policy for log stream replication. + description: 'Deprecated: Please use replication.factor instead. This + field will be removed in future versions of this CRD. ReplicationFactor + defines the policy for log stream replication.' format: int32 minimum: 1 type: integer rules: - description: Rules defines the spec for the ruler component + description: Rules defines the spec for the ruler component. properties: enabled: description: Enabled defines a flag to enable/disable the ruler @@ -530,7 +562,7 @@ spec: type: string template: description: Template defines the resource/limits/tolerations/nodeselectors - per component + per component. properties: compactor: description: Compactor defines the compaction component spec. diff --git a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml index b165660cef34d..9c0816cb6f2ca 100644 --- a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: quay.io/openshift-logging/loki-operator:v0.1.0 - createdAt: "2023-04-26T13:24:45Z" + createdAt: "2023-05-02T10:32:20Z" description: | The Loki Operator for OCP provides a means for configuring and managing a Loki stack for cluster logging. ## Prerequisites and Requirements @@ -451,12 +451,36 @@ spec: - description: NoProxy configures the NO_PROXY/no_proxy env variable. displayName: NoProxy path: proxy.noProxy - - description: ReplicationFactor defines the policy for log stream replication. + - description: Replication defines the configuration for Loki data replication. + displayName: Replication Spec + path: replication + - description: Factor defines the policy for log stream replication. + displayName: Replication Factor + path: replication.factor + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: Zones defines an array of ZoneSpec that the scheduler will try + to satisfy. + displayName: Zones Spec + path: replication.zones + - description: MaxSkew describes the maximum degree to which Pods can be unevenly + distributed. + displayName: Max Skew + path: replication.zones[0].maxSkew + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: TopologyKey is the key that defines a topology in the Nodes' + labels. + displayName: Topology Key + path: replication.zones[0].topologyKey + - description: 'Deprecated: Please use replication.factor instead. This field + will be removed in future versions of this CRD. ReplicationFactor defines + the policy for log stream replication.' displayName: Replication Factor path: replicationFactor x-descriptors: - urn:alm:descriptor:com.tectonic.ui:number - - description: Rules defines the spec for the ruler component + - description: Rules defines the spec for the ruler component. displayName: Rules path: rules x-descriptors: @@ -527,7 +551,7 @@ spec: x-descriptors: - urn:alm:descriptor:io.kubernetes:StorageClass - description: Template defines the resource/limits/tolerations/nodeselectors - per component + per component. displayName: Node Placement path: template x-descriptors: diff --git a/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml b/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml index 7e58cfa685539..5e304b86bf270 100644 --- a/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml +++ b/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml @@ -335,13 +335,45 @@ spec: description: NoProxy configures the NO_PROXY/no_proxy env variable. type: string type: object + replication: + description: Replication defines the configuration for Loki data replication. + properties: + factor: + description: Factor defines the policy for log stream replication. + format: int32 + minimum: 1 + type: integer + zones: + description: Zones defines an array of ZoneSpec that the scheduler + will try to satisfy. + items: + description: ZoneSpec defines the spec to support zone-aware + component deployments. + properties: + maxSkew: + default: 1 + description: MaxSkew describes the maximum degree to which + Pods can be unevenly distributed. + type: integer + topologyKey: + description: TopologyKey is the key that defines a topology + in the Nodes' labels. + type: string + required: + - maxSkew + - topologyKey + type: object + type: array + type: object replicationFactor: - description: ReplicationFactor defines the policy for log stream replication. + description: 'Deprecated: Please use replication.factor instead. This + field will be removed in future versions of this CRD. ReplicationFactor + defines the policy for log stream replication.' format: int32 minimum: 1 type: integer rules: - description: Rules defines the spec for the ruler component + description: Rules defines the spec for the ruler component. properties: enabled: description: Enabled defines a flag to enable/disable the ruler @@ -530,7 +562,7 @@ spec: type: string template: description: Template defines the resource/limits/tolerations/nodeselectors - per component + per component. properties: compactor: description: Compactor defines the compaction component spec. diff --git a/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml b/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml index 0e59d766a2e8d..d55aa81b29c2e 100644 --- a/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml +++ b/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml @@ -318,13 +318,45 @@ spec: description: NoProxy configures the NO_PROXY/no_proxy env variable. type: string type: object + replication: + description: Replication defines the configuration for Loki data replication. + properties: + factor: + description: Factor defines the policy for log stream replication. + format: int32 + minimum: 1 + type: integer + zones: + description: Zones defines an array of ZoneSpec that the scheduler + will try to satisfy. + items: + description: ZoneSpec defines the spec to support zone-aware + component deployments. + properties: + maxSkew: + default: 1 + description: MaxSkew describes the maximum degree to which + Pods can be unevenly distributed. + type: integer + topologyKey: + description: TopologyKey is the key that defines a topology + in the Nodes' labels. + type: string + required: + - maxSkew + - topologyKey + type: object + type: array + type: object replicationFactor: - description: ReplicationFactor defines the policy for log stream replication. + description: 'Deprecated: Please use replication.factor instead. This + field will be removed in future versions of this CRD. ReplicationFactor + defines the policy for log stream replication.' format: int32 minimum: 1 type: integer rules: - description: Rules defines the spec for the ruler component + description: Rules defines the spec for the ruler component. properties: enabled: description: Enabled defines a flag to enable/disable the ruler @@ -513,7 +545,7 @@ spec: type: string template: description: Template defines the resource/limits/tolerations/nodeselectors - per component + per component. properties: compactor: description: Compactor defines the compaction component spec. diff --git a/operator/config/manifests/community-openshift/bases/loki-operator.clusterserviceversion.yaml b/operator/config/manifests/community-openshift/bases/loki-operator.clusterserviceversion.yaml index 0eb945874e1bc..6ae0e63883804 100644 --- a/operator/config/manifests/community-openshift/bases/loki-operator.clusterserviceversion.yaml +++ b/operator/config/manifests/community-openshift/bases/loki-operator.clusterserviceversion.yaml @@ -351,12 +351,36 @@ spec: - description: NoProxy configures the NO_PROXY/no_proxy env variable. displayName: NoProxy path: proxy.noProxy - - description: ReplicationFactor defines the policy for log stream replication. + - description: Replication defines the configuration for Loki data replication. + displayName: Replication Spec + path: replication + - description: Factor defines the policy for log stream replication. + displayName: Replication Factor + path: replication.factor + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: Zones defines an array of ZoneSpec that the scheduler will try + to satisfy. + displayName: Zones Spec + path: replication.zones + - description: MaxSkew describes the maximum degree to which Pods can be unevenly + distributed. + displayName: Max Skew + path: replication.zones[0].maxSkew + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: TopologyKey is the key that defines a topology in the Nodes' + labels. + displayName: Topology Key + path: replication.zones[0].topologyKey + - description: 'Deprecated: Please use replication.factor instead. This field + will be removed in future versions of this CRD. ReplicationFactor defines + the policy for log stream replication.' displayName: Replication Factor path: replicationFactor x-descriptors: - urn:alm:descriptor:com.tectonic.ui:number - - description: Rules defines the spec for the ruler component + - description: Rules defines the spec for the ruler component. displayName: Rules path: rules x-descriptors: @@ -427,7 +451,7 @@ spec: x-descriptors: - urn:alm:descriptor:io.kubernetes:StorageClass - description: Template defines the resource/limits/tolerations/nodeselectors - per component + per component. displayName: Node Placement path: template x-descriptors: diff --git a/operator/config/manifests/community/bases/loki-operator.clusterserviceversion.yaml b/operator/config/manifests/community/bases/loki-operator.clusterserviceversion.yaml index 2f441271cbf99..a1194efee4f14 100644 --- a/operator/config/manifests/community/bases/loki-operator.clusterserviceversion.yaml +++ b/operator/config/manifests/community/bases/loki-operator.clusterserviceversion.yaml @@ -351,12 +351,36 @@ spec: - description: NoProxy configures the NO_PROXY/no_proxy env variable. displayName: NoProxy path: proxy.noProxy - - description: ReplicationFactor defines the policy for log stream replication. + - description: Replication defines the configuration for Loki data replication. + displayName: Replication Spec + path: replication + - description: Factor defines the policy for log stream replication. + displayName: Replication Factor + path: replication.factor + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: Zones defines an array of ZoneSpec that the scheduler will try + to satisfy. + displayName: Zones Spec + path: replication.zones + - description: MaxSkew describes the maximum degree to which Pods can be unevenly + distributed. + displayName: Max Skew + path: replication.zones[0].maxSkew + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: TopologyKey is the key that defines a topology in the Nodes' + labels. + displayName: Topology Key + path: replication.zones[0].topologyKey + - description: 'Deprecated: Please use replication.factor instead. This field + will be removed in future versions of this CRD. ReplicationFactor defines + the policy for log stream replication.' displayName: Replication Factor path: replicationFactor x-descriptors: - urn:alm:descriptor:com.tectonic.ui:number - - description: Rules defines the spec for the ruler component + - description: Rules defines the spec for the ruler component. displayName: Rules path: rules x-descriptors: @@ -427,7 +451,7 @@ spec: x-descriptors: - urn:alm:descriptor:io.kubernetes:StorageClass - description: Template defines the resource/limits/tolerations/nodeselectors - per component + per component. displayName: Node Placement path: template x-descriptors: diff --git a/operator/config/manifests/openshift/bases/loki-operator.clusterserviceversion.yaml b/operator/config/manifests/openshift/bases/loki-operator.clusterserviceversion.yaml index 565abac2d9e73..beeb299074274 100644 --- a/operator/config/manifests/openshift/bases/loki-operator.clusterserviceversion.yaml +++ b/operator/config/manifests/openshift/bases/loki-operator.clusterserviceversion.yaml @@ -363,12 +363,36 @@ spec: - description: NoProxy configures the NO_PROXY/no_proxy env variable. displayName: NoProxy path: proxy.noProxy - - description: ReplicationFactor defines the policy for log stream replication. + - description: Replication defines the configuration for Loki data replication. + displayName: Replication Spec + path: replication + - description: Factor defines the policy for log stream replication. + displayName: Replication Factor + path: replication.factor + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: Zones defines an array of ZoneSpec that the scheduler will try + to satisfy. + displayName: Zones Spec + path: replication.zones + - description: MaxSkew describes the maximum degree to which Pods can be unevenly + distributed. + displayName: Max Skew + path: replication.zones[0].maxSkew + x-descriptors: + - urn:alm:descriptor:com.tectonic.ui:number + - description: TopologyKey is the key that defines a topology in the Nodes' + labels. + displayName: Topology Key + path: replication.zones[0].topologyKey + - description: 'Deprecated: Please use replication.factor instead. This field + will be removed in future versions of this CRD. ReplicationFactor defines + the policy for log stream replication.' displayName: Replication Factor path: replicationFactor x-descriptors: - urn:alm:descriptor:com.tectonic.ui:number - - description: Rules defines the spec for the ruler component + - description: Rules defines the spec for the ruler component. displayName: Rules path: rules x-descriptors: @@ -439,7 +463,7 @@ spec: x-descriptors: - urn:alm:descriptor:io.kubernetes:StorageClass - description: Template defines the resource/limits/tolerations/nodeselectors - per component + per component. displayName: Node Placement path: template x-descriptors: diff --git a/operator/docs/operator/api.md b/operator/docs/operator/api.md index d504a742dd7a5..7aaa5e4bca670 100644 --- a/operator/docs/operator/api.md +++ b/operator/docs/operator/api.md @@ -1602,7 +1602,15 @@ are degraded or the cluster cannot connect to object storage.

Description -

"1x.extra-small"

+

"1x.demo"

+

SizeOneXDemo defines the size of a single Loki deployment +with tiny resource requirements and without HA support. +This size is intended to run in single-node clusters on laptops, +it is only useful for very light testing, demonstrations, or prototypes. +There are no ingestion/query performance guarantees. +DO NOT USE THIS IN PRODUCTION!

+ +

"1x.extra-small"

SizeOneXExtraSmall defines the size of a single Loki deployment with extra small resources/limits requirements and without HA support. This size is ultimately dedicated for development and demo purposes. @@ -1729,7 +1737,22 @@ int32 (Optional) -

ReplicationFactor defines the policy for log stream replication.

+

Deprecated: Please use replication.factor instead. This field will be removed in future versions of this CRD. +ReplicationFactor defines the policy for log stream replication.

+ + + + +replication
+ + +ReplicationSpec + + + + +(Optional) +

Replication defines the configuration for Loki data replication.

@@ -1743,7 +1766,7 @@ RulesSpec (Optional) -

Rules defines the spec for the ruler component

+

Rules defines the spec for the ruler component.

@@ -1771,7 +1794,7 @@ LokiTemplateSpec (Optional) -

Template defines the resource/limits/tolerations/nodeselectors per component

+

Template defines the resource/limits/tolerations/nodeselectors per component.

@@ -2346,7 +2369,10 @@ string Description -

"azure"

+

"alibabacloud"

+

ObjectStorageSecretAlibabaCloud when using AlibabaCloud OSS for Loki storage

+ +

"azure"

ObjectStorageSecretAzure when using Azure for Loki storage

"gcs"

@@ -3266,6 +3292,49 @@ RemoteWriteClientQueueSpec +## ReplicationSpec { #loki-grafana-com-v1-ReplicationSpec } +

+(Appears on:LokiStackSpec) +

+
+
+ + + + + + + + + + + + + + + + + +
FieldDescription
+factor
+ +int32 + +
+(Optional) +

Factor defines the policy for log stream replication.

+
+zones
+ + +[]ZoneSpec + + +
+(Optional) +

Zones defines an array of ZoneSpec that the scheduler will try to satisfy.

+
+ ## RetentionLimitSpec { #loki-grafana-com-v1-RetentionLimitSpec }

(Appears on:LimitsTemplateSpec) @@ -3897,6 +3966,46 @@ AuthorizationSpec + +## ZoneSpec { #loki-grafana-com-v1-ZoneSpec } +

+(Appears on:ReplicationSpec) +

+
+

ZoneSpec defines the spec to support zone-aware component deployments.

+
+ + + + + + + + + + + + + + + + + +
FieldDescription
+maxSkew
+ +int + +
+

MaxSkew describes the maximum degree to which Pods can be unevenly distributed.

+
+topologyKey
+ +string + +
+

TopologyKey is the key that defines a topology in the Nodes’ labels.

+

diff --git a/operator/internal/handlers/lokistack_create_or_update.go b/operator/internal/handlers/lokistack_create_or_update.go index cb057280726da..c18d634b3f179 100644 --- a/operator/internal/handlers/lokistack_create_or_update.go +++ b/operator/internal/handlers/lokistack_create_or_update.go @@ -367,6 +367,7 @@ func CreateOrUpdateLokiStack( depAnnotations, err := dependentAnnotations(ctx, k, obj) if err != nil { + l.Error(err, "failed to set dependent annotations") return err } diff --git a/operator/internal/manifests/build_test.go b/operator/internal/manifests/build_test.go index 25eda861ff3d1..6b5dae8a67636 100644 --- a/operator/internal/manifests/build_test.go +++ b/operator/internal/manifests/build_test.go @@ -45,6 +45,7 @@ func TestApplyUserOptions_OverrideDefaults(t *testing.T) { require.Equal(t, defs.Size, opt.Stack.Size) require.Equal(t, defs.Limits, opt.Stack.Limits) require.Equal(t, defs.ReplicationFactor, opt.Stack.ReplicationFactor) + require.Equal(t, defs.Replication, opt.Stack.Replication) require.Equal(t, defs.ManagementState, opt.Stack.ManagementState) require.Equal(t, defs.Template.Ingester, opt.Stack.Template.Ingester) require.Equal(t, defs.Template.Querier, opt.Stack.Template.Querier) diff --git a/operator/internal/manifests/config.go b/operator/internal/manifests/config.go index 1bec942540bf0..4cc36623085e8 100644 --- a/operator/internal/manifests/config.go +++ b/operator/internal/manifests/config.go @@ -103,6 +103,16 @@ func ConfigOptions(opt Options) config.Options { protocol = "https" } + // nolint:staticcheck + // Handle the deprecated field opt.Stack.ReplicationFactor. + if (opt.Stack.Replication == nil || opt.Stack.Replication.Factor == 0) && opt.Stack.ReplicationFactor > 0 { + if opt.Stack.Replication == nil { + opt.Stack.Replication = &lokiv1.ReplicationSpec{} + } + + opt.Stack.Replication.Factor = opt.Stack.ReplicationFactor + } + return config.Options{ Stack: opt.Stack, Gates: opt.Gates, diff --git a/operator/internal/manifests/config_test.go b/operator/internal/manifests/config_test.go index f2b7cb129e97e..6b0a77df058ca 100644 --- a/operator/internal/manifests/config_test.go +++ b/operator/internal/manifests/config_test.go @@ -6,14 +6,16 @@ import ( "testing" "github.com/google/uuid" - lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" - "github.com/grafana/loki/operator/internal/manifests" - "github.com/grafana/loki/operator/internal/manifests/internal/config" - "github.com/grafana/loki/operator/internal/manifests/openshift" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" "k8s.io/utils/pointer" + + lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" + "github.com/grafana/loki/operator/internal/manifests" + "github.com/grafana/loki/operator/internal/manifests/internal/config" + "github.com/grafana/loki/operator/internal/manifests/openshift" ) func TestConfigMap_ReturnsSHA1OfBinaryContents(t *testing.T) { @@ -46,10 +48,12 @@ func randomConfigOptions() manifests.Options { Namespace: uuid.New().String(), Image: uuid.New().String(), Stack: lokiv1.LokiStackSpec{ - Size: lokiv1.SizeOneXExtraSmall, - Storage: lokiv1.ObjectStorageSpec{}, - StorageClassName: uuid.New().String(), - ReplicationFactor: rand.Int31(), + Size: lokiv1.SizeOneXExtraSmall, + Storage: lokiv1.ObjectStorageSpec{}, + StorageClassName: uuid.New().String(), + Replication: &lokiv1.ReplicationSpec{ + Factor: rand.Int31(), + }, Limits: &lokiv1.LimitsSpec{ Global: &lokiv1.LimitsTemplateSpec{ IngestionLimits: &lokiv1.IngestionLimitSpec{ @@ -1041,3 +1045,97 @@ func TestConfigOptions_RulerOverrides_OCPUserWorkloadOnlyEnabled(t *testing.T) { }) } } + +func TestConfigOptions_Replication(t *testing.T) { + tt := []struct { + desc string + spec lokiv1.LokiStackSpec + wantOptions lokiv1.ReplicationSpec + }{ + { + desc: "nominal case", + spec: lokiv1.LokiStackSpec{ + Replication: &lokiv1.ReplicationSpec{ + Factor: 2, + Zones: []lokiv1.ZoneSpec{ + { + TopologyKey: "us-east-1a", + MaxSkew: 1, + }, + { + TopologyKey: "us-east-1b", + MaxSkew: 2, + }, + }, + }, + }, + wantOptions: lokiv1.ReplicationSpec{ + Factor: 2, + Zones: []lokiv1.ZoneSpec{ + { + TopologyKey: "us-east-1a", + MaxSkew: 1, + }, + { + TopologyKey: "us-east-1b", + MaxSkew: 2, + }, + }, + }, + }, + { + desc: "using deprecated ReplicationFactor", + spec: lokiv1.LokiStackSpec{ + ReplicationFactor: 3, + }, + wantOptions: lokiv1.ReplicationSpec{ + Factor: 3, + }, + }, + { + desc: "using deprecated ReplicationFactor with ReplicationSpec", + spec: lokiv1.LokiStackSpec{ + ReplicationFactor: 2, + Replication: &lokiv1.ReplicationSpec{ + Factor: 4, + Zones: []lokiv1.ZoneSpec{ + { + TopologyKey: "us-east-1a", + MaxSkew: 1, + }, + { + TopologyKey: "us-east-1b", + MaxSkew: 2, + }, + }, + }, + }, + wantOptions: lokiv1.ReplicationSpec{ + Factor: 4, + Zones: []lokiv1.ZoneSpec{ + { + TopologyKey: "us-east-1a", + MaxSkew: 1, + }, + { + TopologyKey: "us-east-1b", + MaxSkew: 2, + }, + }, + }, + }, + } + + for _, tc := range tt { + tc := tc + t.Run(tc.desc, func(t *testing.T) { + t.Parallel() + + inOpt := manifests.Options{ + Stack: tc.spec, + } + options := manifests.ConfigOptions(inOpt) + require.Equal(t, tc.wantOptions, *options.Stack.Replication) + }) + } +} diff --git a/operator/internal/manifests/distributor.go b/operator/internal/manifests/distributor.go index d099ad1314137..a60902610a57d 100644 --- a/operator/internal/manifests/distributor.go +++ b/operator/internal/manifests/distributor.go @@ -127,6 +127,10 @@ func NewDistributorDeployment(opts Options) *appsv1.Deployment { podSpec.NodeSelector = opts.Stack.Template.Distributor.NodeSelector } + if opts.Stack.Replication != nil { + podSpec.TopologySpreadConstraints = topologySpreadConstraints(*opts.Stack.Replication) + } + return &appsv1.Deployment{ TypeMeta: metav1.TypeMeta{ Kind: "Deployment", diff --git a/operator/internal/manifests/distributor_test.go b/operator/internal/manifests/distributor_test.go index ca336099a8353..04513e50ce2bb 100644 --- a/operator/internal/manifests/distributor_test.go +++ b/operator/internal/manifests/distributor_test.go @@ -5,6 +5,7 @@ import ( "testing" "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" @@ -98,3 +99,43 @@ func TestBuildDistributor_PodDisruptionBudget(t *testing.T) { require.Equal(t, int32(1), pdb.Spec.MinAvailable.IntVal) require.EqualValues(t, manifests.ComponentLabels(manifests.LabelDistributorComponent, opts.Name), pdb.Spec.Selector.MatchLabels) } + +func TestNewDistributorDeployment_TopologySpreadConstraints(t *testing.T) { + depl := manifests.NewDistributorDeployment(manifests.Options{ + Name: "abcd", + Namespace: "efgh", + Stack: lokiv1.LokiStackSpec{ + Template: &lokiv1.LokiTemplateSpec{ + Distributor: &lokiv1.LokiComponentSpec{ + Replicas: 1, + }, + }, + Replication: &lokiv1.ReplicationSpec{ + Zones: []lokiv1.ZoneSpec{ + { + TopologyKey: "zone", + MaxSkew: 3, + }, + { + TopologyKey: "region", + MaxSkew: 2, + }, + }, + Factor: 1, + }, + }, + }) + + require.Equal(t, []corev1.TopologySpreadConstraint{ + { + MaxSkew: 3, + TopologyKey: "zone", + WhenUnsatisfiable: "DoNotSchedule", + }, + { + MaxSkew: 2, + TopologyKey: "region", + WhenUnsatisfiable: "DoNotSchedule", + }, + }, depl.Spec.Template.Spec.TopologySpreadConstraints) +} diff --git a/operator/internal/manifests/indexgateway.go b/operator/internal/manifests/indexgateway.go index b8c4b2ebe6841..17aaf77131460 100644 --- a/operator/internal/manifests/indexgateway.go +++ b/operator/internal/manifests/indexgateway.go @@ -133,6 +133,10 @@ func NewIndexGatewayStatefulSet(opts Options) *appsv1.StatefulSet { podSpec.NodeSelector = opts.Stack.Template.IndexGateway.NodeSelector } + if opts.Stack.Replication != nil { + podSpec.TopologySpreadConstraints = topologySpreadConstraints(*opts.Stack.Replication) + } + return &appsv1.StatefulSet{ TypeMeta: metav1.TypeMeta{ Kind: "StatefulSet", diff --git a/operator/internal/manifests/indexgateway_test.go b/operator/internal/manifests/indexgateway_test.go index e69e7b64d54d9..5c1eb1b12cb6f 100644 --- a/operator/internal/manifests/indexgateway_test.go +++ b/operator/internal/manifests/indexgateway_test.go @@ -3,10 +3,12 @@ package manifests_test import ( "testing" - lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" - "github.com/grafana/loki/operator/internal/manifests" "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" + + lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" + "github.com/grafana/loki/operator/internal/manifests" ) func TestNewIndexGatewayStatefulSet_HasTemplateConfigHashAnnotation(t *testing.T) { @@ -103,3 +105,43 @@ func TestBuildIndexGateway_PodDisruptionBudget(t *testing.T) { require.EqualValues(t, manifests.ComponentLabels(manifests.LabelIndexGatewayComponent, opts.Name), pdb.Spec.Selector.MatchLabels) } + +func TestNewIndexGatewayStatefulSet_TopologySpreadConstraints(t *testing.T) { + depl := manifests.NewIndexGatewayStatefulSet(manifests.Options{ + Name: "abcd", + Namespace: "efgh", + Stack: lokiv1.LokiStackSpec{ + Template: &lokiv1.LokiTemplateSpec{ + IndexGateway: &lokiv1.LokiComponentSpec{ + Replicas: 1, + }, + }, + Replication: &lokiv1.ReplicationSpec{ + Zones: []lokiv1.ZoneSpec{ + { + TopologyKey: "zone", + MaxSkew: 3, + }, + { + TopologyKey: "region", + MaxSkew: 2, + }, + }, + Factor: 1, + }, + }, + }) + + require.Equal(t, []corev1.TopologySpreadConstraint{ + { + MaxSkew: 3, + TopologyKey: "zone", + WhenUnsatisfiable: "DoNotSchedule", + }, + { + MaxSkew: 2, + TopologyKey: "region", + WhenUnsatisfiable: "DoNotSchedule", + }, + }, depl.Spec.Template.Spec.TopologySpreadConstraints) +} diff --git a/operator/internal/manifests/ingester.go b/operator/internal/manifests/ingester.go index 673e913a74c31..d845f327b1365 100644 --- a/operator/internal/manifests/ingester.go +++ b/operator/internal/manifests/ingester.go @@ -143,6 +143,10 @@ func NewIngesterStatefulSet(opts Options) *appsv1.StatefulSet { podSpec.NodeSelector = opts.Stack.Template.Ingester.NodeSelector } + if opts.Stack.Replication != nil { + podSpec.TopologySpreadConstraints = topologySpreadConstraints(*opts.Stack.Replication) + } + return &appsv1.StatefulSet{ TypeMeta: metav1.TypeMeta{ Kind: "StatefulSet", diff --git a/operator/internal/manifests/ingester_test.go b/operator/internal/manifests/ingester_test.go index d2d5955d51d1a..e72f3773100a3 100644 --- a/operator/internal/manifests/ingester_test.go +++ b/operator/internal/manifests/ingester_test.go @@ -5,13 +5,11 @@ import ( "testing" "github.com/stretchr/testify/require" - policyv1 "k8s.io/api/policy/v1" - corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" v1 "github.com/grafana/loki/operator/apis/config/v1" - lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" "github.com/grafana/loki/operator/internal/manifests" "github.com/grafana/loki/operator/internal/manifests/internal" @@ -138,6 +136,46 @@ func TestBuildIngester_PodDisruptionBudget(t *testing.T) { } } +func TestNewIngesterStatefulSet_TopologySpreadConstraints(t *testing.T) { + ss := manifests.NewIngesterStatefulSet(manifests.Options{ + Name: "abcd", + Namespace: "efgh", + Stack: lokiv1.LokiStackSpec{ + Template: &lokiv1.LokiTemplateSpec{ + Ingester: &lokiv1.LokiComponentSpec{ + Replicas: 1, + }, + }, + Replication: &lokiv1.ReplicationSpec{ + Zones: []lokiv1.ZoneSpec{ + { + TopologyKey: "zone", + MaxSkew: 2, + }, + { + TopologyKey: "region", + MaxSkew: 1, + }, + }, + Factor: 1, + }, + }, + }) + + require.Equal(t, []corev1.TopologySpreadConstraint{ + { + MaxSkew: 2, + TopologyKey: "zone", + WhenUnsatisfiable: "DoNotSchedule", + }, + { + MaxSkew: 1, + TopologyKey: "region", + WhenUnsatisfiable: "DoNotSchedule", + }, + }, ss.Spec.Template.Spec.TopologySpreadConstraints) +} + func TestIngesterPodAntiAffinity(t *testing.T) { sts := manifests.NewIngesterStatefulSet(manifests.Options{ Name: "abcd", diff --git a/operator/internal/manifests/internal/config/build_test.go b/operator/internal/manifests/internal/config/build_test.go index 239be8b718091..78c8a64da6b58 100644 --- a/operator/internal/manifests/internal/config/build_test.go +++ b/operator/internal/manifests/internal/config/build_test.go @@ -3,11 +3,12 @@ package config import ( "testing" + "github.com/stretchr/testify/require" + "k8s.io/utils/pointer" + configv1 "github.com/grafana/loki/operator/apis/config/v1" lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" "github.com/grafana/loki/operator/internal/manifests/storage" - "github.com/stretchr/testify/require" - "k8s.io/utils/pointer" ) func TestBuild_ConfigAndRuntimeConfig_NoRuntimeConfigGenerated(t *testing.T) { @@ -174,7 +175,9 @@ overrides: ` opts := Options{ Stack: lokiv1.LokiStackSpec{ - ReplicationFactor: 1, + Replication: &lokiv1.ReplicationSpec{ + Factor: 1, + }, Limits: &lokiv1.LimitsSpec{ Global: &lokiv1.LimitsTemplateSpec{ IngestionLimits: &lokiv1.IngestionLimitSpec{ @@ -420,7 +423,9 @@ overrides: ` opts := Options{ Stack: lokiv1.LokiStackSpec{ - ReplicationFactor: 1, + Replication: &lokiv1.ReplicationSpec{ + Factor: 1, + }, Limits: &lokiv1.LimitsSpec{ Global: &lokiv1.LimitsTemplateSpec{ IngestionLimits: &lokiv1.IngestionLimitSpec{ @@ -525,7 +530,9 @@ overrides: func TestBuild_ConfigAndRuntimeConfig_CreateLokiConfigFailed(t *testing.T) { opts := Options{ Stack: lokiv1.LokiStackSpec{ - ReplicationFactor: 1, + Replication: &lokiv1.ReplicationSpec{ + Factor: 1, + }, Limits: &lokiv1.LimitsSpec{ Global: &lokiv1.LimitsTemplateSpec{ IngestionLimits: &lokiv1.IngestionLimitSpec{ @@ -815,7 +822,9 @@ overrides: ` opts := Options{ Stack: lokiv1.LokiStackSpec{ - ReplicationFactor: 1, + Replication: &lokiv1.ReplicationSpec{ + Factor: 1, + }, Limits: &lokiv1.LimitsSpec{ Global: &lokiv1.LimitsTemplateSpec{ IngestionLimits: &lokiv1.IngestionLimitSpec{ @@ -1157,7 +1166,9 @@ overrides: ` opts := Options{ Stack: lokiv1.LokiStackSpec{ - ReplicationFactor: 1, + Replication: &lokiv1.ReplicationSpec{ + Factor: 1, + }, Limits: &lokiv1.LimitsSpec{ Global: &lokiv1.LimitsTemplateSpec{ IngestionLimits: &lokiv1.IngestionLimitSpec{ @@ -1513,7 +1524,9 @@ overrides: ` opts := Options{ Stack: lokiv1.LokiStackSpec{ - ReplicationFactor: 1, + Replication: &lokiv1.ReplicationSpec{ + Factor: 1, + }, Limits: &lokiv1.LimitsSpec{ Global: &lokiv1.LimitsTemplateSpec{ IngestionLimits: &lokiv1.IngestionLimitSpec{ @@ -1837,7 +1850,9 @@ overrides: ` opts := Options{ Stack: lokiv1.LokiStackSpec{ - ReplicationFactor: 1, + Replication: &lokiv1.ReplicationSpec{ + Factor: 1, + }, Limits: &lokiv1.LimitsSpec{ Global: &lokiv1.LimitsTemplateSpec{ IngestionLimits: &lokiv1.IngestionLimitSpec{ @@ -2217,7 +2232,9 @@ overrides: ` opts := Options{ Stack: lokiv1.LokiStackSpec{ - ReplicationFactor: 1, + Replication: &lokiv1.ReplicationSpec{ + Factor: 1, + }, Limits: &lokiv1.LimitsSpec{ Global: &lokiv1.LimitsTemplateSpec{ IngestionLimits: &lokiv1.IngestionLimitSpec{ @@ -2598,7 +2615,9 @@ overrides: ` opts := Options{ Stack: lokiv1.LokiStackSpec{ - ReplicationFactor: 1, + Replication: &lokiv1.ReplicationSpec{ + Factor: 1, + }, Limits: &lokiv1.LimitsSpec{ Global: &lokiv1.LimitsTemplateSpec{ IngestionLimits: &lokiv1.IngestionLimitSpec{ @@ -2978,7 +2997,9 @@ overrides: ` opts := Options{ Stack: lokiv1.LokiStackSpec{ - ReplicationFactor: 1, + Replication: &lokiv1.ReplicationSpec{ + Factor: 1, + }, Limits: &lokiv1.LimitsSpec{ Global: &lokiv1.LimitsTemplateSpec{ IngestionLimits: &lokiv1.IngestionLimitSpec{ @@ -3352,7 +3373,9 @@ overrides: ` opts := Options{ Stack: lokiv1.LokiStackSpec{ - ReplicationFactor: 1, + Replication: &lokiv1.ReplicationSpec{ + Factor: 1, + }, Limits: &lokiv1.LimitsSpec{ Global: &lokiv1.LimitsTemplateSpec{ IngestionLimits: &lokiv1.IngestionLimitSpec{ diff --git a/operator/internal/manifests/internal/config/loki-config.yaml b/operator/internal/manifests/internal/config/loki-config.yaml index 7cbec377d4749..a2db6e3a6eeb8 100644 --- a/operator/internal/manifests/internal/config/loki-config.yaml +++ b/operator/internal/manifests/internal/config/loki-config.yaml @@ -113,7 +113,7 @@ ingester: join_after: 30s num_tokens: 512 ring: - replication_factor: {{ .Stack.ReplicationFactor }} + replication_factor: {{ .Stack.Replication.Factor }} max_transfer_retries: 0 wal: enabled: true diff --git a/operator/internal/manifests/internal/sizes.go b/operator/internal/manifests/internal/sizes.go index 247d05064b3f3..5623da4cb7e99 100644 --- a/operator/internal/manifests/internal/sizes.go +++ b/operator/internal/manifests/internal/sizes.go @@ -227,8 +227,10 @@ var ResourceRequirementsTable = map[lokiv1.LokiStackSizeType]ComponentResources{ // StackSizeTable defines the default configurations for each size var StackSizeTable = map[lokiv1.LokiStackSizeType]lokiv1.LokiStackSpec{ lokiv1.SizeOneXDemo: { - Size: lokiv1.SizeOneXDemo, - ReplicationFactor: 1, + Size: lokiv1.SizeOneXDemo, + Replication: &lokiv1.ReplicationSpec{ + Factor: 1, + }, Limits: &lokiv1.LimitsSpec{ Global: &lokiv1.LimitsTemplateSpec{ IngestionLimits: &lokiv1.IngestionLimitSpec{ @@ -277,8 +279,10 @@ var StackSizeTable = map[lokiv1.LokiStackSizeType]lokiv1.LokiStackSpec{ }, }, lokiv1.SizeOneXExtraSmall: { - Size: lokiv1.SizeOneXExtraSmall, - ReplicationFactor: 1, + Size: lokiv1.SizeOneXExtraSmall, + Replication: &lokiv1.ReplicationSpec{ + Factor: 1, + }, Limits: &lokiv1.LimitsSpec{ Global: &lokiv1.LimitsTemplateSpec{ IngestionLimits: &lokiv1.IngestionLimitSpec{ @@ -328,8 +332,10 @@ var StackSizeTable = map[lokiv1.LokiStackSizeType]lokiv1.LokiStackSpec{ }, lokiv1.SizeOneXSmall: { - Size: lokiv1.SizeOneXSmall, - ReplicationFactor: 2, + Size: lokiv1.SizeOneXSmall, + Replication: &lokiv1.ReplicationSpec{ + Factor: 2, + }, Limits: &lokiv1.LimitsSpec{ Global: &lokiv1.LimitsTemplateSpec{ IngestionLimits: &lokiv1.IngestionLimitSpec{ @@ -381,8 +387,10 @@ var StackSizeTable = map[lokiv1.LokiStackSizeType]lokiv1.LokiStackSpec{ }, lokiv1.SizeOneXMedium: { - Size: lokiv1.SizeOneXMedium, - ReplicationFactor: 3, + Size: lokiv1.SizeOneXMedium, + Replication: &lokiv1.ReplicationSpec{ + Factor: 3, + }, Limits: &lokiv1.LimitsSpec{ Global: &lokiv1.LimitsTemplateSpec{ IngestionLimits: &lokiv1.IngestionLimitSpec{ diff --git a/operator/internal/manifests/mutate.go b/operator/internal/manifests/mutate.go index 6a0fff77a2f9a..fbf234f94fa40 100644 --- a/operator/internal/manifests/mutate.go +++ b/operator/internal/manifests/mutate.go @@ -138,6 +138,14 @@ func mergeWithOverride(dst, src interface{}) error { return nil } +func mergeWithOverrideEmpty(dst, src interface{}) error { + err := mergo.Merge(dst, src, mergo.WithOverwriteWithEmptyValue) + if err != nil { + return kverrors.Wrap(err, "unable to mergeWithOverrideEmpty", "dst", dst, "src", src) + } + return nil +} + func mutateConfigMap(existing, desired *corev1.ConfigMap) { existing.Annotations = desired.Annotations existing.Labels = desired.Labels @@ -224,7 +232,7 @@ func mutateDeployment(existing, desired *appsv1.Deployment) error { existing.Spec.Selector = desired.Spec.Selector } existing.Spec.Replicas = desired.Spec.Replicas - if err := mergeWithOverride(&existing.Spec.Template, desired.Spec.Template); err != nil { + if err := mergeWithOverrideEmpty(&existing.Spec.Template, desired.Spec.Template); err != nil { return err } if err := mergeWithOverride(&existing.Spec.Strategy, desired.Spec.Strategy); err != nil { @@ -240,7 +248,7 @@ func mutateStatefulSet(existing, desired *appsv1.StatefulSet) error { existing.Spec.Selector = desired.Spec.Selector } existing.Spec.Replicas = desired.Spec.Replicas - if err := mergeWithOverride(&existing.Spec.Template, desired.Spec.Template); err != nil { + if err := mergeWithOverrideEmpty(&existing.Spec.Template, desired.Spec.Template); err != nil { return err } return nil diff --git a/operator/internal/manifests/querier.go b/operator/internal/manifests/querier.go index ae8693e2e2e30..c97ba3d9026b8 100644 --- a/operator/internal/manifests/querier.go +++ b/operator/internal/manifests/querier.go @@ -133,6 +133,10 @@ func NewQuerierDeployment(opts Options) *appsv1.Deployment { podSpec.NodeSelector = opts.Stack.Template.Querier.NodeSelector } + if opts.Stack.Replication != nil { + podSpec.TopologySpreadConstraints = topologySpreadConstraints(*opts.Stack.Replication) + } + return &appsv1.Deployment{ TypeMeta: metav1.TypeMeta{ Kind: "Deployment", diff --git a/operator/internal/manifests/querier_test.go b/operator/internal/manifests/querier_test.go index 8342b79b02c0c..38967a3ba91cc 100644 --- a/operator/internal/manifests/querier_test.go +++ b/operator/internal/manifests/querier_test.go @@ -3,12 +3,14 @@ package manifests_test import ( "testing" - lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" - "github.com/grafana/loki/operator/internal/manifests" "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + + lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" + "github.com/grafana/loki/operator/internal/manifests" ) func TestNewQuerierDeployment_HasTemplateConfigHashAnnotation(t *testing.T) { @@ -183,3 +185,43 @@ func TestBuildQuerier_PodDisruptionBudget(t *testing.T) { }) } } + +func TestNewQuerierDeployment_TopologySpreadConstraints(t *testing.T) { + depl := manifests.NewQuerierDeployment(manifests.Options{ + Name: "abcd", + Namespace: "efgh", + Stack: lokiv1.LokiStackSpec{ + Template: &lokiv1.LokiTemplateSpec{ + Querier: &lokiv1.LokiComponentSpec{ + Replicas: 1, + }, + }, + Replication: &lokiv1.ReplicationSpec{ + Zones: []lokiv1.ZoneSpec{ + { + TopologyKey: "zone", + MaxSkew: 2, + }, + { + TopologyKey: "region", + MaxSkew: 1, + }, + }, + Factor: 1, + }, + }, + }) + + require.Equal(t, []corev1.TopologySpreadConstraint{ + { + MaxSkew: 2, + TopologyKey: "zone", + WhenUnsatisfiable: "DoNotSchedule", + }, + { + MaxSkew: 1, + TopologyKey: "region", + WhenUnsatisfiable: "DoNotSchedule", + }, + }, depl.Spec.Template.Spec.TopologySpreadConstraints) +} diff --git a/operator/internal/manifests/query-frontend.go b/operator/internal/manifests/query-frontend.go index 331775003037f..c042cc4d1823a 100644 --- a/operator/internal/manifests/query-frontend.go +++ b/operator/internal/manifests/query-frontend.go @@ -139,6 +139,10 @@ func NewQueryFrontendDeployment(opts Options) *appsv1.Deployment { podSpec.NodeSelector = opts.Stack.Template.QueryFrontend.NodeSelector } + if opts.Stack.Replication != nil { + podSpec.TopologySpreadConstraints = topologySpreadConstraints(*opts.Stack.Replication) + } + return &appsv1.Deployment{ TypeMeta: metav1.TypeMeta{ Kind: "Deployment", diff --git a/operator/internal/manifests/query-frontend_test.go b/operator/internal/manifests/query-frontend_test.go index e078befb03d44..783cd028f83a1 100644 --- a/operator/internal/manifests/query-frontend_test.go +++ b/operator/internal/manifests/query-frontend_test.go @@ -4,6 +4,7 @@ import ( "testing" "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" @@ -92,3 +93,43 @@ func TestBuildQueryFrontend_PodDisruptionBudget(t *testing.T) { require.Equal(t, int32(1), pdb.Spec.MinAvailable.IntVal) require.EqualValues(t, ComponentLabels(LabelQueryFrontendComponent, opts.Name), pdb.Spec.Selector.MatchLabels) } + +func TestNewQueryFrontendDeployment_TopologySpreadConstraints(t *testing.T) { + depl := NewQueryFrontendDeployment(Options{ + Name: "abcd", + Namespace: "efgh", + Stack: lokiv1.LokiStackSpec{ + Template: &lokiv1.LokiTemplateSpec{ + QueryFrontend: &lokiv1.LokiComponentSpec{ + Replicas: 1, + }, + }, + Replication: &lokiv1.ReplicationSpec{ + Zones: []lokiv1.ZoneSpec{ + { + TopologyKey: "zone", + MaxSkew: 1, + }, + { + TopologyKey: "region", + MaxSkew: 2, + }, + }, + Factor: 1, + }, + }, + }) + + require.Equal(t, []corev1.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: "zone", + WhenUnsatisfiable: "DoNotSchedule", + }, + { + MaxSkew: 2, + TopologyKey: "region", + WhenUnsatisfiable: "DoNotSchedule", + }, + }, depl.Spec.Template.Spec.TopologySpreadConstraints) +} diff --git a/operator/internal/manifests/var.go b/operator/internal/manifests/var.go index 28e2e0c7417d0..56e7b1a99b375 100644 --- a/operator/internal/manifests/var.go +++ b/operator/internal/manifests/var.go @@ -11,6 +11,7 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/pointer" + lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" "github.com/grafana/loki/operator/internal/manifests/openshift" ) @@ -141,6 +142,22 @@ func serviceAnnotations(serviceName string, enableSigningService bool) map[strin return annotations } +func topologySpreadConstraints(spec lokiv1.ReplicationSpec) []corev1.TopologySpreadConstraint { + var tsc []corev1.TopologySpreadConstraint + if len(spec.Zones) > 0 { + tsc = make([]corev1.TopologySpreadConstraint, len(spec.Zones)) + for i, z := range spec.Zones { + tsc[i] = corev1.TopologySpreadConstraint{ + MaxSkew: int32(z.MaxSkew), + TopologyKey: z.TopologyKey, + WhenUnsatisfiable: corev1.DoNotSchedule, + } + } + } + + return tsc +} + // ComponentLabels is a list of all commonLabels including the app.kubernetes.io/component: label func ComponentLabels(component, stackName string) labels.Set { return labels.Merge(commonLabels(stackName), map[string]string{ diff --git a/operator/internal/validation/lokistack_test.go b/operator/internal/validation/lokistack_test.go index 37a787bc3f45b..67678720e8a6d 100644 --- a/operator/internal/validation/lokistack_test.go +++ b/operator/internal/validation/lokistack_test.go @@ -4,9 +4,10 @@ import ( "context" "testing" + "github.com/stretchr/testify/require" + lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" "github.com/grafana/loki/operator/internal/validation" - "github.com/stretchr/testify/require" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" diff --git a/operator/main.go b/operator/main.go index 08bae850f4e08..de2284d278093 100644 --- a/operator/main.go +++ b/operator/main.go @@ -8,6 +8,7 @@ import ( "github.com/ViaQ/logerr/v2/kverrors" "github.com/ViaQ/logerr/v2/log" + "github.com/grafana/loki/operator/internal/validation" "github.com/grafana/loki/operator/internal/validation/openshift"