Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

operator: Add support for built-in-cert-rotation for all internal lokistack encryption #7064

Merged
merged 23 commits into from
Nov 3, 2022
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
0b864dc
operator: Add support for built-in-cert-rotation for all internal lok…
periklis Aug 31, 2022
3ed86b5
Fix built-in ca and cert expiry check handler
periklis Oct 20, 2022
88d5662
Fix expiry checks
periklis Oct 21, 2022
191c0cd
Fix certrotation apply settings
periklis Oct 24, 2022
57d0840
Refactor cert creator
periklis Oct 25, 2022
bbcf3a4
Apply code review suggestions
periklis Oct 25, 2022
74043dc
Eliminate extra flag for refreshOnlyWhenExpired
periklis Oct 25, 2022
7590783
Add watcher for secrects in lokistack controller
periklis Oct 25, 2022
e952de2
Make certrotation requeues depending on smallest refresh flag
periklis Oct 26, 2022
311b755
Fix formatting
periklis Oct 26, 2022
21902cb
Refactor cert creators
periklis Oct 26, 2022
f91bff9
Refactor certrotation package
periklis Oct 26, 2022
559a6ef
Cleanup
periklis Oct 26, 2022
c1a71c8
Eliminate multiple time.ParseDuration for rotation options
periklis Oct 26, 2022
d56d8d3
Use cert refresh for requeue
periklis Oct 26, 2022
dabb1e4
Rename RotationOptions to Rotation
periklis Oct 26, 2022
3264457
Naming improvements
periklis Oct 26, 2022
68134bc
Apply code review suggestions
periklis Oct 27, 2022
102fc70
Naming improvements
periklis Oct 27, 2022
9b4b1cd
Apply code review suggestions
periklis Oct 27, 2022
06dc940
Update operator/internal/handlers/internal/serviceaccounts/serviceacc…
periklis Nov 2, 2022
36b44fc
Fix upgrade from serviceCA on OpenShift
periklis Nov 2, 2022
eb71085
Fix upgrade servicemonitors
periklis Nov 2, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion operator/apis/config/v1/projectconfig_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,30 @@ import (
cfg "sigs.k8s.io/controller-runtime/pkg/config/v1alpha1"
)

// BuiltInCertManagement is the configuration for the built-in facility to generate and rotate
// TLS client and serving certificates for all LokiStack services and internal clients except
// for the lokistack-gateway.
type BuiltInCertManagement struct {
periklis marked this conversation as resolved.
Show resolved Hide resolved
// Enabled defines to flag to enable/disable built-in certificate management feature gate.
Enabled bool `json:"enabled,omitempty"`
// CACertValidity defines the total duration of the CA certificate validity.
CACertValidity string `json:"caValidity,omitempty"`
// CACertRefresh defines the duration of the CA certificate validity until a rotation
// should happen. It can be set up to 80% of CA certificate validity or equal to the
// CA certificate validity. Latter should be used only for rotating only when expired.
CACertRefresh string `json:"caRefresh,omitempty"`
// CertValidity defines the total duration of the validity for all LokiStack certificates.
CertValidity string `json:"certValidity,omitempty"`
// CertRefresh defines the duration of the certificate validity until a rotation
// should happen. It can be set up to 80% of certificate validity or equal to the
// certificate validity. Latter should be used only for rotating only when expired.
// The refresh is applied to all LokiStack certificates at once.
CertRefresh string `json:"certRefresh,omitempty"`
}

// OpenShiftFeatureGates is the supported set of all operator features gates on OpenShift.
type OpenShiftFeatureGates struct {
// ServingCertsService enables OpenShift service-ca annotations on Services
// ServingCertsService enables OpenShift service-ca annotations on the lokistack-gateway service only
// to use the in-platform CA and generate a TLS cert/key pair per service for
// in-cluster data-in-transit encryption.
// More details: https://docs.openshift.com/container-platform/latest/security/certificate_types_descriptions/service-ca-certificates.html
Expand Down Expand Up @@ -54,6 +75,17 @@ type FeatureGates struct {
// suffix `-ca-bundle`, e.g. `lokistack-dev-ca-bundle` and the following data:
// - `service-ca.crt`: The CA signing the service certificate in `tls.crt`.
GRPCEncryption bool `json:"grpcEncryption,omitempty"`
// BuiltInCertManagement enables the built-in facility for generating and rotating
// TLS client and serving certificates for all LokiStack services and internal clients except
// for the lokistack-gateway, In detail all internal Loki HTTP and GRPC communication is lifted
// to require mTLS. For the lokistack-gateay you need to provide a secret with or use the `ServingCertsService`
// on OpenShift:
// - `tls.crt`: The TLS server side certificate.
// - `tls.key`: The TLS key for server-side encryption.
// In addition each service requires a configmap named as the LokiStack CR with the
// suffix `-ca-bundle`, e.g. `lokistack-dev-ca-bundle` and the following data:
// - `service-ca.crt`: The CA signing the service certificate in `tls.crt`.
BuiltInCertManagement BuiltInCertManagement `json:"builtInCertManagement,omitempty"`

// LokiStackGateway enables reconciling the reverse-proxy lokistack-gateway
// component for multi-tenant authentication/authorization traffic control
Expand Down
16 changes: 16 additions & 0 deletions operator/apis/config/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions operator/apis/loki/v1/lokistack_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,8 @@ const (
ReasonInvalidTenantsConfiguration LokiStackConditionReason = "InvalidTenantsConfiguration"
// ReasonMissingGatewayOpenShiftBaseDomain when the reconciler cannot lookup the OpenShift DNS base domain.
ReasonMissingGatewayOpenShiftBaseDomain LokiStackConditionReason = "MissingGatewayOpenShiftBaseDomain"
// ReasonFailedCertificateRotation when the reconciler cannot rotate any of the required TLS certificates.
ReasonFailedCertificateRotation LokiStackConditionReason = "FailedCertificateRotation"
)

// PodStatusMap defines the type for mapping pod status to pod name.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ data:
#
httpEncryption: true
grpcEncryption: true
builtInCertManagement:
enabled: true
# CA certificate validity: 5 years
caValidity: 43830h
# CA certificate refresh at 80% of validity
caRefresh: 35064h
# Target certificate validity: 90d
certValidity: 2160h
# Target certificate refresh at 80% of validity
certRefresh: 1728h
#
# Component feature gates
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -976,6 +976,7 @@ spec:
- endpoints
- nodes
- pods
- secrets
- serviceaccounts
- services
verbs:
Expand Down
9 changes: 6 additions & 3 deletions operator/cmd/loki-broker/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,13 @@ func (c *config) registerFlags(f *flag.FlagSet) {
f.StringVar(&c.Namespace, "namespace", "", "Namespace to deploy to")
f.StringVar(&c.Image, "image", manifests.DefaultContainerImage, "The Loki image pull spec loation.")
// Feature flags
c.featureFlags = configv1.FeatureGates{}
c.featureFlags.OpenShift = configv1.OpenShiftFeatureGates{}
periklis marked this conversation as resolved.
Show resolved Hide resolved
f.BoolVar(&c.featureFlags.OpenShift.ServingCertsService, "with-serving-certs-service", false, "Enable usage of serving certs service on OpenShift.")
f.BoolVar(&c.featureFlags.ServiceMonitors, "with-service-monitors", false, "Enable service monitors for all LokiStack components.")
f.BoolVar(&c.featureFlags.OpenShift.ServingCertsService, "with-serving-certs-service", false, "Enable usage of serving certs service on OpenShift.")
f.BoolVar(&c.featureFlags.BuiltInCertManagement.Enabled, "with-builtin-cert-management", false, "Enable usage built-in cert generation and rotation.")
f.StringVar(&c.featureFlags.BuiltInCertManagement.CACertValidity, "ca-cert-validity", "8760h", "CA Certificate validity duration.")
f.StringVar(&c.featureFlags.BuiltInCertManagement.CACertRefresh, "ca-cert-refresh", "7008h", "CA Certificate refresh time.")
f.StringVar(&c.featureFlags.BuiltInCertManagement.CertValidity, "target-cert-validity", "2160h", "Target Certificate validity duration.")
f.StringVar(&c.featureFlags.BuiltInCertManagement.CertRefresh, "target-cert-refresh", "1728h", "Target Certificate refresh time.")
f.BoolVar(&c.featureFlags.HTTPEncryption, "with-http-tls-services", false, "Enables TLS for all LokiStack GRPC services.")
f.BoolVar(&c.featureFlags.GRPCEncryption, "with-grpc-tls-services", false, "Enables TLS for all LokiStack HTTP services.")
f.BoolVar(&c.featureFlags.ServiceMonitorTLSEndpoints, "with-service-monitor-tls-endpoints", false, "Enable TLS endpoint for service monitors.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ spec:
- name: manager
env:
- name: RELATED_IMAGE_LOKI
value: docker.io/grafana/loki:main-ec0bf70
value: docker.io/grafana/loki:k120-26d2989
- name: RELATED_IMAGE_GATEWAY
value: quay.io/observatorium/api:latest
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ featureGates:
#
httpEncryption: true
grpcEncryption: true
builtInCertManagement:
enabled: true
# CA certificate validity: 5 years
caValidity: 43830h
# CA certificate refresh at 80% of validity
caRefresh: 35064h
# Target certificate validity: 90d
certValidity: 2160h
# Target certificate refresh at 80% of validity
certRefresh: 1728h
#
# Component feature gates
#
Expand Down
1 change: 1 addition & 0 deletions operator/config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ rules:
- endpoints
- nodes
- pods
- secrets
- serviceaccounts
- services
verbs:
Expand Down
113 changes: 113 additions & 0 deletions operator/controllers/loki/certrotation_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package controllers

import (
"context"
"errors"
"time"

"github.com/go-logr/logr"
configv1 "github.com/grafana/loki/operator/apis/config/v1"
lokiv1 "github.com/grafana/loki/operator/apis/loki/v1"
"github.com/grafana/loki/operator/controllers/loki/internal/lokistack"
"github.com/grafana/loki/operator/controllers/loki/internal/management/state"
"github.com/grafana/loki/operator/internal/certrotation"
"github.com/grafana/loki/operator/internal/external/k8s"
"github.com/grafana/loki/operator/internal/handlers"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)

// CertRotationReconciler reconciles the `loki.grafana.com/certRotationRequiredAt` annotation on
// any LokiStack object associated with any of the owned signer/client/serving certificates secrets
// and CA bundle configmap.
type CertRotationReconciler struct {
client.Client
Log logr.Logger
Scheme *runtime.Scheme
FeatureGates configv1.FeatureGates
}

// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
// Compare the state specified by the LokiStack object against the actual cluster state,
// and then perform operations to make the cluster state reflect the state specified by
// the user.
//
// For more details, check Reconcile and its Result here:
// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.7.0/pkg/reconcile
func (r *CertRotationReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
managed, err := state.IsManaged(ctx, req, r.Client)
if err != nil {
return ctrl.Result{
Requeue: true,
}, err
}
if !managed {
r.Log.Info("Skipping reconciliation for unmanaged LokiStack resource", "name", req.String())
// Stop requeueing for unmanaged LokiStack custom resources
return ctrl.Result{}, nil
}

rt, err := certrotation.ParseRotation(r.FeatureGates.BuiltInCertManagement)
if err != nil {
return ctrl.Result{Requeue: false}, err
}

checkExpiryAfter := expiryRetryAfter(rt.TargetCertRefresh)
r.Log.Info("Checking if LokiStack certificates expired", "name", req.String(), "interval", checkExpiryAfter.String())

var expired *certrotation.CertExpiredError

err = handlers.CheckCertExpiry(ctx, r.Log, req, r.Client, r.FeatureGates)
switch {
case errors.As(err, &expired):
r.Log.Info("Certificate expired", "msg", expired.Error())
case err != nil:
return ctrl.Result{
Requeue: true,
}, err
default:
r.Log.Info("Skipping cert rotation, all LokiStack certificates still valid", "name", req.String())
return ctrl.Result{
RequeueAfter: checkExpiryAfter,
}, nil
}

r.Log.Error(err, "LokiStack certificates expired", "name", req.String())
periklis marked this conversation as resolved.
Show resolved Hide resolved
err = lokistack.AnnotateForRequiredCertRotation(ctx, r.Client, req.Name, req.Namespace)
if err != nil {
r.Log.Error(err, "failed to annotate required cert rotation", "name", req.String())
periklis marked this conversation as resolved.
Show resolved Hide resolved
return ctrl.Result{
Requeue: true,
}, err
}

return ctrl.Result{
RequeueAfter: checkExpiryAfter,
}, nil
}

// SetupWithManager sets up the controller with the Manager.
func (r *CertRotationReconciler) SetupWithManager(mgr ctrl.Manager) error {
b := ctrl.NewControllerManagedBy(mgr)
return r.buildController(k8s.NewCtrlBuilder(b))
}

func (r *CertRotationReconciler) buildController(bld k8s.Builder) error {
return bld.
For(&lokiv1.LokiStack{}).
Owns(&corev1.Secret{}).
Complete(r)
}

func expiryRetryAfter(certRefresh time.Duration) time.Duration {
day := 24 * time.Hour
if certRefresh > day {
return 12 * time.Hour
}

return certRefresh / 4
}
74 changes: 74 additions & 0 deletions operator/controllers/loki/certrotation_controller_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package controllers

import (
"testing"
"time"

lokiv1 "github.com/grafana/loki/operator/apis/loki/v1"
"github.com/grafana/loki/operator/internal/external/k8s/k8sfakes"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
)

func TestCertRotationController_RegistersCustomResource_WithDefaultPredicates(t *testing.T) {
b := &k8sfakes.FakeBuilder{}
k := &k8sfakes.FakeClient{}
c := &CertRotationReconciler{Client: k, Scheme: scheme}

b.ForReturns(b)
b.OwnsReturns(b)

err := c.buildController(b)
require.NoError(t, err)

// Require only one For-Call for the custom resource
require.Equal(t, 1, b.ForCallCount())

// Require For-call with LokiStack resource
obj, _ := b.ForArgsForCall(0)
require.Equal(t, &lokiv1.LokiStack{}, obj)
}

func TestCertRotationController_RegisterOwnedResources_WithDefaultPredicates(t *testing.T) {
b := &k8sfakes.FakeBuilder{}
k := &k8sfakes.FakeClient{}
c := &CertRotationReconciler{Client: k, Scheme: scheme}

b.ForReturns(b)
b.OwnsReturns(b)

err := c.buildController(b)
require.NoError(t, err)

require.Equal(t, 1, b.OwnsCallCount())

obj, _ := b.OwnsArgsForCall(0)
require.Equal(t, &corev1.Secret{}, obj)
}

func TestCertRotationController_ExpiryRetryAfter(t *testing.T) {
tt := []struct {
desc string
refresh time.Duration
wantDuration time.Duration
wantError bool
}{
{
desc: "multi-day refresh durarion",
refresh: 120 * time.Hour,
wantDuration: 12 * time.Hour,
},
{
desc: "less than a day refresh duration",
refresh: 10 * time.Hour,
wantDuration: 2*time.Hour + 30*time.Minute,
},
}
for _, tc := range tt {
tc := tc
t.Run(tc.desc, func(t *testing.T) {
t.Parallel()
require.Equal(t, tc.wantDuration, expiryRetryAfter(tc.refresh))
})
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package lokistack

import (
"context"
"fmt"
"time"

"github.com/ViaQ/logerr/v2/kverrors"
lokiv1 "github.com/grafana/loki/operator/apis/loki/v1"
"github.com/grafana/loki/operator/internal/external/k8s"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"sigs.k8s.io/controller-runtime/pkg/client"
)

const certRotationRequiredAtKey = "loki.grafana.com/certRotationRequiredAt"

// AnnotateForRequiredCertRotation adds/updates the `loki.grafana.com/certRotationRequiredAt` annotation
// to the named Lokistack if any of the managed client/serving/ca certificates expired. If no LokiStack
// is found, then skip reconciliation.
func AnnotateForRequiredCertRotation(ctx context.Context, k k8s.Client, name, namespace string) error {
var s lokiv1.LokiStack
key := client.ObjectKey{Name: name, Namespace: namespace}

if err := k.Get(ctx, key, &s); err != nil {
if apierrors.IsNotFound(err) {
// Do nothing
return nil
}

return kverrors.Wrap(err, "failed to get lokistack", "key", key)
}

ss := s.DeepCopy()
if ss.Annotations == nil {
ss.Annotations = make(map[string]string)
}

ss.Annotations[certRotationRequiredAtKey] = time.Now().UTC().Format(time.RFC3339)

if err := k.Update(ctx, ss); err != nil {
return kverrors.Wrap(err, fmt.Sprintf("failed to update lokistack `%s` annotation", certRotationRequiredAtKey), "key", key)
}

return nil
}
Loading