From d6eefd031d458bbe295317047eac45aaf3830007 Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Fri, 16 Jul 2021 12:31:44 -0400 Subject: [PATCH 1/9] deploy daemonsets for logs --- docs/operator/getting-started.md | 3 + docs/operator/upgrade-guide.md | 61 ++++ docs/upgrade-guide/_index.md | 4 +- .../apis/monitoring/v1alpha1/types_logs.go | 2 - pkg/operator/clientutil/clientutil.go | 39 +- pkg/operator/config/config.go | 28 ++ pkg/operator/deployment_builder.go | 94 ++++- pkg/operator/operator.go | 11 +- pkg/operator/operator_test.go | 47 +++ pkg/operator/reconciler.go | 141 +------- pkg/operator/reconciler_logs.go | 65 ++++ pkg/operator/reconciler_metrics.go | 211 +++++++++++ pkg/operator/resources_logs.go | 340 ++++++++++++++++++ .../{resources.go => resources_metrics.go} | 8 +- pkg/operator/secondary_resource.go | 4 + ...ml => monitoring.grafana.com_podlogs.yaml} | 6 +- 16 files changed, 913 insertions(+), 151 deletions(-) create mode 100644 docs/operator/upgrade-guide.md create mode 100644 pkg/operator/operator_test.go create mode 100644 pkg/operator/reconciler_logs.go create mode 100644 pkg/operator/reconciler_metrics.go create mode 100644 pkg/operator/resources_logs.go rename pkg/operator/{resources.go => resources_metrics.go} (98%) rename production/operator/crds/{monitoring.grafana.com_pod-logs.yaml => monitoring.grafana.com_podlogs.yaml} (99%) diff --git a/docs/operator/getting-started.md b/docs/operator/getting-started.md index 503ad0d7fdbf..4787a2a5ac71 100644 --- a/docs/operator/getting-started.md +++ b/docs/operator/getting-started.md @@ -82,6 +82,8 @@ rules: resources: - grafana-agents - prometheus-instances + - logs-instances + - podlogs verbs: [get, list, watch] - apiGroups: [monitoring.coreos.com] resources: @@ -101,6 +103,7 @@ rules: - apiGroups: ["apps"] resources: - statefulsets + - daemonsets verbs: [get, list, watch, create, update, patch, delete] --- diff --git a/docs/operator/upgrade-guide.md b/docs/operator/upgrade-guide.md new file mode 100644 index 000000000000..edbb64e8533e --- /dev/null +++ b/docs/operator/upgrade-guide.md @@ -0,0 +1,61 @@ ++++ +title = "Upgrade guide" +weight = 200 ++++ + +# Upgrade guide + +This guide describes all breaking changes that have happened in prior releases +and how to migrate to newer versions of the Grafana Agent Operator. For +upgrading the Grafana Agent, please refer to its +[upgrade guide]({{< relref "../upgrade-guide" >}}) instead. + +## Unreleased + +These changes will come in a future version. + +### RBAC additions for logging support + +Now that the Grafana Agent Operator supports logs, the RBAC rules used by +the operator must be extended to LogsInstances, PodLogs, and DaemonSets. + +Example new ClusterRole for the Operator to use: + +```yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: grafana-agent-operator +rules: +- apiGroups: [monitoring.grafana.com] + resources: + - grafana-agents + - prometheus-instances + - logs-instances + - podlogs + verbs: [get, list, watch] +- apiGroups: [monitoring.coreos.com] + resources: + - podmonitors + - probes + - servicemonitors + verbs: [get, list, watch] +- apiGroups: [""] + resources: + - namespaces + verbs: [get, list, watch] +- apiGroups: [""] + resources: + - secrets + - services + verbs: [get, list, watch, create, update, patch, delete] +- apiGroups: ["apps"] + resources: + - statefulsets + - daemonsets + verbs: [get, list, watch, create, update, patch, delete] +``` + +These RBAC permissions do not need to be given to the GrafanaAgent resource +itself, just the operator. The RBAC permissions recommended for the GrafanaAgent +resource for metrics already cover logging support. diff --git a/docs/upgrade-guide/_index.md b/docs/upgrade-guide/_index.md index 423bd67b6031..011ab0157cbb 100644 --- a/docs/upgrade-guide/_index.md +++ b/docs/upgrade-guide/_index.md @@ -6,7 +6,9 @@ weight = 200 # Upgrade guide This guide describes all breaking changes that have happened in prior -releases and how to migrate to newer versions. +releases and how to migrate to newer versions. For upgrading the +Grafana Agent Operator, please refer to its +[upgrade guide]({{< relref "./operator/upgrade-guide" >}}) instead. ## Unreleased diff --git a/pkg/operator/apis/monitoring/v1alpha1/types_logs.go b/pkg/operator/apis/monitoring/v1alpha1/types_logs.go index 6dfb7b766297..a93de26e3fec 100644 --- a/pkg/operator/apis/monitoring/v1alpha1/types_logs.go +++ b/pkg/operator/apis/monitoring/v1alpha1/types_logs.go @@ -153,8 +153,6 @@ type LogsInstanceList struct { } // +kubebuilder:object:root=true -// +kubebuilder:resource:path="pod-logs" -// +kubebuilder:resource:singular="pod-logs" // +kubebuilder:resource:categories="agent-operator" // PodLogs defines how to collect logs for a pod. diff --git a/pkg/operator/clientutil/clientutil.go b/pkg/operator/clientutil/clientutil.go index 4e2abe9aa280..abeb9ce9f0e6 100644 --- a/pkg/operator/clientutil/clientutil.go +++ b/pkg/operator/clientutil/clientutil.go @@ -114,7 +114,7 @@ func CreateOrUpdateStatefulSet(ctx context.Context, c client.Client, ss *apps_v1 } err = c.Create(ctx, ss) if err != nil { - return fmt.Errorf("failed to update statefulset: statefulset: %w", err) + return fmt.Errorf("failed to update statefulset: creating new statefulset: %w", err) } } else if err != nil { return fmt.Errorf("failed to update statefulset: %w", err) @@ -124,6 +124,43 @@ func CreateOrUpdateStatefulSet(ctx context.Context, c client.Client, ss *apps_v1 return nil } +// CreateOrUpdateDaemonSet applies the given DaemonSet against the client. +func CreateOrUpdateDaemonSet(ctx context.Context, c client.Client, ss *apps_v1.DaemonSet) error { + var exist apps_v1.DaemonSet + err := c.Get(ctx, client.ObjectKeyFromObject(ss), &exist) + if err != nil && !k8s_errors.IsNotFound(err) { + return fmt.Errorf("failed to retrieve existing daemonset: %w", err) + } + + if k8s_errors.IsNotFound(err) { + err := c.Create(ctx, ss) + if err != nil { + return fmt.Errorf("failed to create daemonset: %w", err) + } + } else { + ss.ResourceVersion = exist.ResourceVersion + ss.SetOwnerReferences(mergeOwnerReferences(ss.GetOwnerReferences(), exist.GetOwnerReferences())) + ss.SetLabels(mergeMaps(ss.Labels, exist.Labels)) + ss.SetAnnotations(mergeMaps(ss.Annotations, exist.Annotations)) + + err := c.Update(ctx, ss) + if k8s_errors.IsNotAcceptable(err) { + err = c.Delete(ctx, ss) + if err != nil { + return fmt.Errorf("failed to update daemonset: deleting old daemonset: %w", err) + } + err = c.Create(ctx, ss) + if err != nil { + return fmt.Errorf("failed to update daemonset: creating new deamonset: %w", err) + } + } else if err != nil { + return fmt.Errorf("failed to update daemonset: %w", err) + } + } + + return nil +} + func mergeOwnerReferences(new, old []meta_v1.OwnerReference) []meta_v1.OwnerReference { existing := make(map[types.UID]bool) for _, ref := range old { diff --git a/pkg/operator/config/config.go b/pkg/operator/config/config.go index 1f58c9c4cd0f..e47936755e64 100644 --- a/pkg/operator/config/config.go +++ b/pkg/operator/config/config.go @@ -29,6 +29,18 @@ const ( LogsType ) +// String returns the string form of Type. +func (t Type) String() string { + switch t { + case MetricsType: + return "metrics" + case LogsType: + return "logs" + default: + return fmt.Sprintf("unknown (%d)", int(t)) + } +} + //go:embed templates/* var templates embed.FS @@ -72,9 +84,25 @@ func (d *Deployment) DeepCopy() *Deployment { }) } + l := make([]LogInstance, 0, len(d.Logs)) + for _, i := range d.Logs { + var ( + inst = i.Instance.DeepCopy() + pLogs = make([]*grafana.PodLogs, 0, len(i.PodLogs)) + ) + for _, pLog := range i.PodLogs { + pLogs = append(pLogs, pLog.DeepCopy()) + } + l = append(l, LogInstance{ + Instance: inst, + PodLogs: pLogs, + }) + } + return &Deployment{ Agent: d.Agent.DeepCopy(), Prometheis: p, + Logs: l, } } diff --git a/pkg/operator/deployment_builder.go b/pkg/operator/deployment_builder.go index 7d4903666860..94f285e5df64 100644 --- a/pkg/operator/deployment_builder.go +++ b/pkg/operator/deployment_builder.go @@ -30,13 +30,13 @@ type deploymentBuilder struct { } func (b *deploymentBuilder) Build(ctx context.Context, l log.Logger) (config.Deployment, error) { - instances, err := b.getPrometheusInstances(ctx) + rootMetricInstances, err := b.getPrometheusInstances(ctx) if err != nil { return config.Deployment{}, err } - promInstances := make([]config.PrometheusInstance, 0, len(instances)) + metricInstances := make([]config.PrometheusInstance, 0, len(rootMetricInstances)) - for _, inst := range instances { + for _, inst := range rootMetricInstances { sMons, err := b.getServiceMonitors(ctx, l, inst) if err != nil { return config.Deployment{}, fmt.Errorf("unable to fetch ServiceMonitors: %w", err) @@ -50,7 +50,7 @@ func (b *deploymentBuilder) Build(ctx context.Context, l log.Logger) (config.Dep return config.Deployment{}, fmt.Errorf("unable to fetch Probes: %w", err) } - promInstances = append(promInstances, config.PrometheusInstance{ + metricInstances = append(metricInstances, config.PrometheusInstance{ Instance: inst, ServiceMonitors: sMons, PodMonitors: pMons, @@ -58,9 +58,28 @@ func (b *deploymentBuilder) Build(ctx context.Context, l log.Logger) (config.Dep }) } + rootLogsInstances, err := b.getLogsInstances(ctx) + if err != nil { + return config.Deployment{}, err + } + logsInstances := make([]config.LogInstance, 0, len(rootLogsInstances)) + + for _, inst := range rootLogsInstances { + podLogs, err := b.getPodLogs(ctx, inst) + if err != nil { + return config.Deployment{}, fmt.Errorf("unable to fetch PodLogs: %w", err) + } + + logsInstances = append(logsInstances, config.LogInstance{ + Instance: inst, + PodLogs: podLogs, + }) + } + return config.Deployment{ Agent: b.Agent, - Prometheis: promInstances, + Prometheis: metricInstances, + Logs: logsInstances, }, nil } @@ -311,3 +330,68 @@ func (b *deploymentBuilder) getProbes( } return items, nil } + +func (b *deploymentBuilder) getLogsInstances(ctx context.Context) ([]*grafana_v1alpha1.LogsInstance, error) { + sel, err := b.getResourceSelector( + b.Agent.Namespace, + b.Agent.Spec.Logs.InstanceNamespaceSelector, + b.Agent.Spec.Logs.InstanceSelector, + ) + if err != nil { + return nil, fmt.Errorf("unable to build logs resource selector: %w", err) + } + b.ResourceSelectors[resourceLogsInstance] = append(b.ResourceSelectors[resourceLogsInstance], sel) + + var ( + list grafana_v1alpha1.LogsInstanceList + namespace = namespaceFromSelector(sel) + listOptions = &client.ListOptions{LabelSelector: sel.Labels, Namespace: namespace} + ) + if err := b.List(ctx, &list, listOptions); err != nil { + return nil, err + } + + items := make([]*grafana_v1alpha1.LogsInstance, 0, len(list.Items)) + for _, item := range list.Items { + if match, err := b.matchNamespace(ctx, &item.ObjectMeta, sel); match { + items = append(items, item) + } else if err != nil { + return nil, fmt.Errorf("failed getting namespace: %w", err) + } + } + return items, nil +} + +func (b *deploymentBuilder) getPodLogs( + ctx context.Context, + inst *grafana_v1alpha1.LogsInstance, +) ([]*grafana_v1alpha1.PodLogs, error) { + sel, err := b.getResourceSelector( + inst.Namespace, + inst.Spec.PodLogsNamespaceSelector, + inst.Spec.PodLogsSelector, + ) + if err != nil { + return nil, fmt.Errorf("unable to build service monitor resource selector: %w", err) + } + b.ResourceSelectors[resourcePodLogs] = append(b.ResourceSelectors[resourcePodLogs], sel) + + var ( + list grafana_v1alpha1.PodLogsList + namespace = namespaceFromSelector(sel) + listOptions = &client.ListOptions{LabelSelector: sel.Labels, Namespace: namespace} + ) + if err := b.List(ctx, &list, listOptions); err != nil { + return nil, err + } + + items := make([]*grafana_v1alpha1.PodLogs, 0, len(list.Items)) + for _, item := range list.Items { + if match, err := b.matchNamespace(ctx, &item.ObjectMeta, sel); match { + items = append(items, item) + } else if err != nil { + return nil, fmt.Errorf("failed getting namespace: %w", err) + } + } + return items, nil +} diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index fb0de592f9f3..33b380539e68 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -119,14 +119,17 @@ func New(l log.Logger, c *Config, m manager.Manager) error { err := controller.NewControllerManagedBy(m). For(applyGVK(&grafana_v1alpha1.GrafanaAgent{}), builder.WithPredicates(agentPredicates...)). - Owns(applyGVK(&core_v1.Service{})). - Owns(applyGVK(&core_v1.Secret{})). Owns(applyGVK(&apps_v1.StatefulSet{})). + Owns(applyGVK(&apps_v1.DaemonSet{})). + Owns(applyGVK(&core_v1.Secret{})). + Owns(applyGVK(&core_v1.Service{})). + Watches(watchType(&core_v1.Secret{}), events[resourceSecret]). + Watches(watchType(&grafana_v1alpha1.LogsInstance{}), events[resourceLogsInstance]). + Watches(watchType(&grafana_v1alpha1.PodLogs{}), events[resourcePodLogs]). Watches(watchType(&grafana_v1alpha1.PrometheusInstance{}), events[resourcePromInstance]). - Watches(watchType(&promop_v1.ServiceMonitor{}), events[resourceServiceMonitor]). Watches(watchType(&promop_v1.PodMonitor{}), events[resourcePodMonitor]). Watches(watchType(&promop_v1.Probe{}), events[resourceProbe]). - Watches(watchType(&core_v1.Secret{}), events[resourceSecret]). + Watches(watchType(&promop_v1.ServiceMonitor{}), events[resourceServiceMonitor]). Complete(&reconciler{ Client: m.GetClient(), scheme: m.GetScheme(), diff --git a/pkg/operator/operator_test.go b/pkg/operator/operator_test.go new file mode 100644 index 000000000000..7362879e22c9 --- /dev/null +++ b/pkg/operator/operator_test.go @@ -0,0 +1,47 @@ +package operator + +import ( + "fmt" + "testing" + + gragent "github.com/grafana/agent/pkg/operator/apis/monitoring/v1alpha1" + "k8s.io/apimachinery/pkg/api/meta" + meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" +) + +func TestOperator(t *testing.T) { + instList := &gragent.GrafanaAgentList{ + TypeMeta: meta_v1.TypeMeta{ + Kind: "GrafanaAgentList", + APIVersion: gragent.SchemeGroupVersion.Identifier(), + }, + ListMeta: meta_v1.ListMeta{}, + Items: []*gragent.GrafanaAgent{ + {ObjectMeta: meta_v1.ObjectMeta{Name: "a"}}, + {ObjectMeta: meta_v1.ObjectMeta{Name: "b"}}, + }, + } + _ = instList + + meta.EachListItem(instList, func(o runtime.Object) error { + fmt.Printf("%#v\n", o) + return nil + }) + + /* + data, err := runtime.DefaultUnstructuredConverter.ToUnstructured(instList) + require.NoError(t, err) + us := unstructured.Unstructured{Object: data} + + _ = us.EachListItem(func(o runtime.Object) error { + us := o.(*unstructured.Unstructured) + + var a gragent.GrafanaAgent + runtime.DefaultUnstructuredConverter.FromUnstructured(us.Object, &a) + fmt.Printf("%#v\n", a) + return nil + }) + */ + +} diff --git a/pkg/operator/reconciler.go b/pkg/operator/reconciler.go index 914f61ef22f7..88aa0b928ea1 100644 --- a/pkg/operator/reconciler.go +++ b/pkg/operator/reconciler.go @@ -2,23 +2,18 @@ package operator import ( "context" - "errors" "fmt" - "os" "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" - "github.com/google/go-jsonnet" grafana_v1alpha1 "github.com/grafana/agent/pkg/operator/apis/monitoring/v1alpha1" "github.com/grafana/agent/pkg/operator/assets" "github.com/grafana/agent/pkg/operator/clientutil" "github.com/grafana/agent/pkg/operator/config" "github.com/grafana/agent/pkg/operator/logutil" - apps_v1 "k8s.io/api/apps/v1" core_v1 "k8s.io/api/core/v1" k8s_errors "k8s.io/apimachinery/pkg/api/errors" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" controller "sigs.k8s.io/controller-runtime" @@ -82,10 +77,17 @@ func (r *reconciler) Reconcile(ctx context.Context, req controller.Request) (con type reconcileFunc func(context.Context, log.Logger, config.Deployment, assets.SecretStore) error actors := []reconcileFunc{ - r.createConfigurationSecret, + // Operator-wide resources r.createSecrets, - r.createGoverningService, - r.createStatefulSets, + + // Metrics resources (may be a no-op if no metrics configured) + r.createMetricsConfigurationSecret, + r.createMetricsGoverningService, + r.createMetricsStatefulSets, + + // Logs resources (may be a no-op if no logs configured) + r.createLogsConfigurationSecret, + r.createLogsDaemonSet, } for _, actor := range actors { err := actor(ctx, l, deployment, secrets) @@ -143,53 +145,6 @@ func (r *reconciler) fillStore(ctx context.Context, refs []config.AssetReference return nil } -// createConfigurationSecret creates the Grafana Agent configuration and stores -// it into a secret. -func (r *reconciler) createConfigurationSecret( - ctx context.Context, - l log.Logger, - d config.Deployment, - s assets.SecretStore, -) error { - - rawConfig, err := d.BuildConfig(s, config.MetricsType) - - var jsonnetError jsonnet.RuntimeError - if errors.As(err, &jsonnetError) { - // Dump Jsonnet errors to the console to retain newlines and make them - // easier to digest. - fmt.Fprintf(os.Stderr, "%s", jsonnetError.Error()) - } - if err != nil { - return fmt.Errorf("unable to build config: %w", err) - } - - blockOwnerDeletion := true - - secret := core_v1.Secret{ - ObjectMeta: v1.ObjectMeta{ - Namespace: d.Agent.Namespace, - Name: fmt.Sprintf("%s-config", d.Agent.Name), - Labels: r.config.Labels.Merge(managedByOperatorLabels), - OwnerReferences: []v1.OwnerReference{{ - APIVersion: d.Agent.APIVersion, - BlockOwnerDeletion: &blockOwnerDeletion, - Kind: d.Agent.Kind, - Name: d.Agent.Name, - UID: d.Agent.UID, - }}, - }, - Data: map[string][]byte{"agent.yml": []byte(rawConfig)}, - } - - level.Info(l).Log("msg", "reconciling secret", "secret", secret.Name) - err = clientutil.CreateOrUpdateSecret(ctx, r.Client, &secret) - if err != nil { - return fmt.Errorf("failed to reconcile secret: %w", err) - } - return nil -} - // createSecrets creates secrets from the secret store. func (r *reconciler) createSecrets( ctx context.Context, @@ -227,79 +182,3 @@ func (r *reconciler) createSecrets( } return nil } - -// createGoverningService creates the service that governs the (eventual) -// StatefulSet. It must be created before the StatefulSet. -func (r *reconciler) createGoverningService( - ctx context.Context, - l log.Logger, - d config.Deployment, - s assets.SecretStore, -) error { - svc := generateStatefulSetService(r.config, d) - level.Info(l).Log("msg", "reconciling statefulset service", "service", svc.Name) - err := clientutil.CreateOrUpdateService(ctx, r.Client, svc) - if err != nil { - return fmt.Errorf("failed to reconcile statefulset governing service: %w", err) - } - return nil -} - -// createStatefulSets creates a set of Grafana Agent StatefulSets, one per shard. -func (r *reconciler) createStatefulSets( - ctx context.Context, - l log.Logger, - d config.Deployment, - s assets.SecretStore, -) error { - - shards := minShards - if reqShards := d.Agent.Spec.Prometheus.Shards; reqShards != nil && *reqShards > 1 { - shards = *reqShards - } - - // Keep track of generated stateful sets so we can delete ones that should - // no longer exist. - generated := make(map[string]struct{}) - - for shard := int32(0); shard < shards; shard++ { - name := d.Agent.Name - if shard > 0 { - name = fmt.Sprintf("%s-shard-%d", name, shard) - } - - ss, err := generateStatefulSet(r.config, name, d, shard) - if err != nil { - return fmt.Errorf("failed to generate statefulset for shard: %w", err) - } - - level.Info(l).Log("msg", "reconciling statefulset", "statefulset", ss.Name) - err = clientutil.CreateOrUpdateStatefulSet(ctx, r.Client, ss) - if err != nil { - return fmt.Errorf("failed to reconcile statefulset for shard: %w", err) - } - generated[ss.Name] = struct{}{} - } - - // Clean up statefulsets that should no longer exist. - var statefulSets apps_v1.StatefulSetList - err := r.List(ctx, &statefulSets, &client.ListOptions{ - LabelSelector: labels.SelectorFromSet(labels.Set{ - agentNameLabelName: d.Agent.Name, - }), - }) - if err != nil { - return fmt.Errorf("failed to list statefulsets: %w", err) - } - for _, ss := range statefulSets.Items { - if _, keep := generated[ss.Name]; keep { - continue - } - level.Info(l).Log("msg", "deleting stale statefulset", "name", ss.Name) - if err := r.Client.Delete(ctx, &ss); err != nil { - return fmt.Errorf("failed to delete stale statefulset %s: %w", ss.Name, err) - } - } - - return nil -} diff --git a/pkg/operator/reconciler_logs.go b/pkg/operator/reconciler_logs.go new file mode 100644 index 000000000000..869ac4d24bc6 --- /dev/null +++ b/pkg/operator/reconciler_logs.go @@ -0,0 +1,65 @@ +package operator + +import ( + "context" + "fmt" + + "github.com/go-kit/kit/log" + "github.com/go-kit/kit/log/level" + "github.com/grafana/agent/pkg/operator/assets" + "github.com/grafana/agent/pkg/operator/clientutil" + "github.com/grafana/agent/pkg/operator/config" + apps_v1 "k8s.io/api/apps/v1" + k8s_errors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" +) + +// createLogsConfigurationSecret creates the Grafana Agent logs configuration +// and stores it into a secret. +func (r *reconciler) createLogsConfigurationSecret( + ctx context.Context, + l log.Logger, + d config.Deployment, + s assets.SecretStore, +) error { + return r.createTelemetryConfigurationSecret(ctx, l, d, s, config.LogsType) +} + +// createLogsDaemonSet creates a DaemonSet for logs. +func (r *reconciler) createLogsDaemonSet( + ctx context.Context, + l log.Logger, + d config.Deployment, + s assets.SecretStore, +) error { + name := fmt.Sprintf("%s-logs", d.Agent.Name) + ds, err := generateLogsDaemonSet(r.config, name, d) + if err != nil { + return fmt.Errorf("failed to generate DaemonSet: %w", err) + } + key := types.NamespacedName{Namespace: ds.Namespace, Name: ds.Name} + + if len(d.Logs) == 0 { + + var ds apps_v1.DaemonSet + err := r.Client.Get(ctx, key, &ds) + if k8s_errors.IsNotFound(err) { + return nil + } else if err != nil { + return fmt.Errorf("failed to find stale DaemonSet %s: %w", key, err) + } + + err = r.Client.Delete(ctx, &ds) + if err != nil { + return fmt.Errorf("failed to delete stale DaemonSet %s: %w", key, err) + } + return nil + } + + level.Info(l).Log("msg", "reconciling logs daemonset", "ds", key) + err = clientutil.CreateOrUpdateDaemonSet(ctx, r.Client, ds) + if err != nil { + return fmt.Errorf("failed to reconcile statefulset governing service: %w", err) + } + return nil +} diff --git a/pkg/operator/reconciler_metrics.go b/pkg/operator/reconciler_metrics.go new file mode 100644 index 000000000000..e359866d040f --- /dev/null +++ b/pkg/operator/reconciler_metrics.go @@ -0,0 +1,211 @@ +package operator + +import ( + "context" + "errors" + "fmt" + "os" + + "github.com/go-kit/kit/log" + "github.com/go-kit/kit/log/level" + "github.com/google/go-jsonnet" + "github.com/grafana/agent/pkg/operator/assets" + "github.com/grafana/agent/pkg/operator/clientutil" + "github.com/grafana/agent/pkg/operator/config" + apps_v1 "k8s.io/api/apps/v1" + core_v1 "k8s.io/api/core/v1" + k8s_errors "k8s.io/apimachinery/pkg/api/errors" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// createMetricsConfigurationSecret creates the Grafana Agent metrics configuration and stores +// it into a secret. +func (r *reconciler) createMetricsConfigurationSecret( + ctx context.Context, + l log.Logger, + d config.Deployment, + s assets.SecretStore, +) error { + return r.createTelemetryConfigurationSecret(ctx, l, d, s, config.MetricsType) +} + +func (r *reconciler) createTelemetryConfigurationSecret( + ctx context.Context, + l log.Logger, + d config.Deployment, + s assets.SecretStore, + ty config.Type, +) error { + + var shouldCreate bool + key := types.NamespacedName{Namespace: d.Agent.Namespace} + + switch ty { + case config.MetricsType: + key.Name = fmt.Sprintf("%s-config", d.Agent.Name) + shouldCreate = len(d.Prometheis) > 0 + case config.LogsType: + key.Name = fmt.Sprintf("%s-logs-config", d.Agent.Name) + shouldCreate = len(d.Logs) > 0 + default: + return fmt.Errorf("unknown telemetry type %s", ty) + } + + // Delete the old Secret if one exists and we have nothing to create. + if !shouldCreate { + var secret core_v1.Secret + err := r.Client.Get(ctx, key, &secret) + if k8s_errors.IsNotFound(err) { + return nil + } else if err != nil { + return fmt.Errorf("failed to find stale secret %s: %w", key, err) + } + + err = r.Client.Delete(ctx, &secret) + if err != nil { + return fmt.Errorf("failed to delete stale secret %s: %w", key, err) + } + return nil + } + + rawConfig, err := d.BuildConfig(s, ty) + + var jsonnetError jsonnet.RuntimeError + if errors.As(err, &jsonnetError) { + // Dump Jsonnet errors to the console to retain newlines and make them + // easier to digest. + fmt.Fprintf(os.Stderr, "%s", jsonnetError.Error()) + } + if err != nil { + return fmt.Errorf("unable to build config: %w", err) + } + + blockOwnerDeletion := true + + secret := core_v1.Secret{ + ObjectMeta: v1.ObjectMeta{ + Namespace: key.Namespace, + Name: key.Name, + Labels: r.config.Labels.Merge(managedByOperatorLabels), + OwnerReferences: []v1.OwnerReference{{ + APIVersion: d.Agent.APIVersion, + BlockOwnerDeletion: &blockOwnerDeletion, + Kind: d.Agent.Kind, + Name: d.Agent.Name, + UID: d.Agent.UID, + }}, + }, + Data: map[string][]byte{"agent.yml": []byte(rawConfig)}, + } + + level.Info(l).Log("msg", "reconciling secret", "secret", secret.Name) + err = clientutil.CreateOrUpdateSecret(ctx, r.Client, &secret) + if err != nil { + return fmt.Errorf("failed to reconcile secret: %w", err) + } + return nil +} + +// createMetricsGoverningService creates the service that governs the (eventual) +// StatefulSet. It must be created before the StatefulSet. +func (r *reconciler) createMetricsGoverningService( + ctx context.Context, + l log.Logger, + d config.Deployment, + s assets.SecretStore, +) error { + svc := generateMetricsStatefulSetService(r.config, d) + + // Delete the old Secret if one exists and we have no prometheus instances. + if len(d.Prometheis) == 0 { + key := types.NamespacedName{Namespace: svc.Namespace, Name: svc.Name} + + var service core_v1.Service + err := r.Client.Get(ctx, key, &service) + if k8s_errors.IsNotFound(err) { + return nil + } else if err != nil { + return fmt.Errorf("failed to find stale Service %s: %w", key, err) + } + + err = r.Client.Delete(ctx, &service) + if err != nil { + return fmt.Errorf("failed to delete stale Service %s: %w", key, err) + } + return nil + } + + level.Info(l).Log("msg", "reconciling statefulset service", "service", svc.Name) + err := clientutil.CreateOrUpdateService(ctx, r.Client, svc) + if err != nil { + return fmt.Errorf("failed to reconcile statefulset governing service: %w", err) + } + return nil +} + +// createMetricsStatefulSets creates a set of Grafana Agent StatefulSets, one per shard. +func (r *reconciler) createMetricsStatefulSets( + ctx context.Context, + l log.Logger, + d config.Deployment, + s assets.SecretStore, +) error { + + shards := minShards + if reqShards := d.Agent.Spec.Prometheus.Shards; reqShards != nil && *reqShards > 1 { + shards = *reqShards + } + + // Keep track of generated stateful sets so we can delete ones that should + // no longer exist. + generated := make(map[string]struct{}) + + for shard := int32(0); shard < shards; shard++ { + // Don't generate anything if there weren't any instances. + if len(d.Prometheis) == 0 { + continue + } + + name := d.Agent.Name + if shard > 0 { + name = fmt.Sprintf("%s-shard-%d", name, shard) + } + + ss, err := generateMetricsStatefulSet(r.config, name, d, shard) + if err != nil { + return fmt.Errorf("failed to generate statefulset for shard: %w", err) + } + + level.Info(l).Log("msg", "reconciling statefulset", "statefulset", ss.Name) + err = clientutil.CreateOrUpdateStatefulSet(ctx, r.Client, ss) + if err != nil { + return fmt.Errorf("failed to reconcile statefulset for shard: %w", err) + } + generated[ss.Name] = struct{}{} + } + + // Clean up statefulsets that should no longer exist. + var statefulSets apps_v1.StatefulSetList + err := r.List(ctx, &statefulSets, &client.ListOptions{ + LabelSelector: labels.SelectorFromSet(labels.Set{ + agentNameLabelName: d.Agent.Name, + }), + }) + if err != nil { + return fmt.Errorf("failed to list statefulsets: %w", err) + } + for _, ss := range statefulSets.Items { + if _, keep := generated[ss.Name]; keep { + continue + } + level.Info(l).Log("msg", "deleting stale statefulset", "name", ss.Name) + if err := r.Client.Delete(ctx, &ss); err != nil { + return fmt.Errorf("failed to delete stale statefulset %s: %w", ss.Name, err) + } + } + + return nil +} diff --git a/pkg/operator/resources_logs.go b/pkg/operator/resources_logs.go new file mode 100644 index 000000000000..2df31b422a79 --- /dev/null +++ b/pkg/operator/resources_logs.go @@ -0,0 +1,340 @@ +package operator + +import ( + "fmt" + "strings" + + "github.com/grafana/agent/pkg/build" + "github.com/grafana/agent/pkg/operator/clientutil" + "github.com/grafana/agent/pkg/operator/config" + apps_v1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" +) + +func generateLogsDaemonSet( + cfg *Config, + name string, + d config.Deployment, +) (*apps_v1.DaemonSet, error) { + d = *d.DeepCopy() + + if d.Agent.Spec.PortName == "" { + d.Agent.Spec.PortName = defaultPortName + } + + spec, err := generateLogsDaemonSetSpec(cfg, name, d) + if err != nil { + return nil, err + } + + // Don't transfer any kubectl annotations to the DaemonSet so it doesn't get + // pruned by kubectl. + annotations := make(map[string]string) + for k, v := range d.Agent.ObjectMeta.Annotations { + if !strings.HasPrefix(k, "kubectl.kubernetes.io/") { + annotations[k] = v + } + } + + labels := make(map[string]string) + for k, v := range spec.Template.Labels { + labels[k] = v + } + labels[agentNameLabelName] = d.Agent.Name + + boolTrue := true + ds := &apps_v1.DaemonSet{ + ObjectMeta: meta_v1.ObjectMeta{ + Name: name, + Namespace: d.Agent.Namespace, + Labels: labels, + Annotations: annotations, + OwnerReferences: []meta_v1.OwnerReference{{ + APIVersion: d.Agent.APIVersion, + Kind: d.Agent.Kind, + BlockOwnerDeletion: &boolTrue, + Controller: &boolTrue, + Name: d.Agent.Name, + UID: d.Agent.UID, + }}, + }, + Spec: *spec, + } + + // TODO(rfratto): Prometheus Operator has an input hash annotation added here, + // which combines the hash of the DaemonSet, config to the operator, rule + // config map names (unused here), and the previous DaemonSet (if any). + // + // This is used to skip re-applying an unchanged Daemonset. Do we need this? + + if len(d.Agent.Spec.ImagePullSecrets) > 0 { + ds.Spec.Template.Spec.ImagePullSecrets = d.Agent.Spec.ImagePullSecrets + } + + return ds, nil +} + +func generateLogsDaemonSetSpec( + cfg *Config, + name string, + d config.Deployment, +) (*apps_v1.DaemonSetSpec, error) { + + imagePath := fmt.Sprintf("%s:%s", DefaultAgentBaseImage, d.Agent.Spec.Version) + if d.Agent.Spec.Image != nil && *d.Agent.Spec.Image != "" { + imagePath = *d.Agent.Spec.Image + } + + agentArgs := []string{ + "-config.file=/var/lib/grafana-agent/config/agent.yml", + "-config.expand-env=true", + "-reload-port=8081", + } + + // NOTE(rfratto): the Prometheus Operator supports a ListenLocal to prevent a + // service from being created. Given the intent is that Agents can connect to + // each other, ListenLocal isn't currently supported and we always create a port. + ports := []v1.ContainerPort{{ + Name: d.Agent.Spec.PortName, + ContainerPort: 8080, + Protocol: v1.ProtocolTCP, + }} + + volumes := []v1.Volume{ + { + Name: "config", + VolumeSource: v1.VolumeSource{ + Secret: &v1.SecretVolumeSource{ + SecretName: fmt.Sprintf("%s-config", name), + }, + }, + }, + { + // We need a separate volume for storing the rendered config with + // environment variables replaced. While the Agent supports environment + // variable substitution, the value for __replica__ can only be + // determined at runtime. We use a dedicated container for both config + // reloading and rendering. + Name: "config-out", + VolumeSource: v1.VolumeSource{ + EmptyDir: &v1.EmptyDirVolumeSource{}, + }, + }, + { + Name: "secrets", + VolumeSource: v1.VolumeSource{ + Secret: &v1.SecretVolumeSource{ + SecretName: fmt.Sprintf("%s-secrets", d.Agent.Name), + }, + }, + }, + { + Name: "varlog", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "/var/log"}, + }, + }, + { + // Needed for docker. Kubernetes will symlink to this directory. For CRI + // platforms, this doesn't change anything. + Name: "dockerlogs", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "/var/lib/docker/containers"}, + }, + }, + { + // Needed for storing positions for recovery. + Name: "data", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{Path: "/var/lib/grafana-agent/data"}, + }, + }, + } + + volumeMounts := []v1.VolumeMount{{ + Name: "config", + ReadOnly: true, + MountPath: "/var/lib/grafana-agent/config-in", + }, { + Name: "config-out", + MountPath: "/var/lib/grafana-agent/config", + }, { + Name: "secrets", + ReadOnly: true, + MountPath: "/var/lib/grafana-agent/secrets", + }, { + Name: "varlog", + ReadOnly: true, + MountPath: "/var/log", + }, { + Name: "dockerlogs", + ReadOnly: true, + MountPath: "/var/lib/docker/containers", + }, { + Name: "data", + MountPath: "/var/lib/grafana-agent/data", + }} + volumeMounts = append(volumeMounts, d.Agent.Spec.VolumeMounts...) + + for _, s := range d.Agent.Spec.Secrets { + volumes = append(volumes, v1.Volume{ + Name: clientutil.SanitizeVolumeName("secret-" + s), + VolumeSource: v1.VolumeSource{ + Secret: &v1.SecretVolumeSource{SecretName: s}, + }, + }) + volumeMounts = append(volumeMounts, v1.VolumeMount{ + Name: clientutil.SanitizeVolumeName("secret-" + s), + ReadOnly: true, + MountPath: "/var/lib/grafana-agent/secrets", + }) + } + for _, c := range d.Agent.Spec.ConfigMaps { + volumes = append(volumes, v1.Volume{ + Name: clientutil.SanitizeVolumeName("configmap-" + c), + VolumeSource: v1.VolumeSource{ + ConfigMap: &v1.ConfigMapVolumeSource{ + LocalObjectReference: v1.LocalObjectReference{Name: c}, + }, + }, + }) + volumeMounts = append(volumeMounts, v1.VolumeMount{ + Name: clientutil.SanitizeVolumeName("configmap-" + c), + ReadOnly: true, + MountPath: "/var/lib/grafana-agent/configmaps", + }) + } + + podAnnotations := map[string]string{} + podLabels := map[string]string{} + podSelectorLabels := map[string]string{ + "app.kubernetes.io/name": "grafana-agent", + "app.kubernetes.io/version": build.Version, + "app.kubernetes.io/managed-by": "grafana-agent-operator", + "app.kubernetes.io/instance": d.Agent.Name, + "grafana-agent": d.Agent.Name, + agentNameLabelName: d.Agent.Name, + } + if d.Agent.Spec.PodMetadata != nil { + for k, v := range d.Agent.Spec.PodMetadata.Labels { + podLabels[k] = v + } + for k, v := range d.Agent.Spec.PodMetadata.Annotations { + podAnnotations[k] = v + } + } + for k, v := range podSelectorLabels { + podLabels[k] = v + } + + podAnnotations["kubectl.kubernetes.io/default-container"] = "grafana-agent" + + var ( + finalSelectorLabels = cfg.Labels.Merge(podSelectorLabels) + finalLabels = cfg.Labels.Merge(podLabels) + ) + + envVars := []v1.EnvVar{{ + Name: "POD_NAME", + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{FieldPath: "metadata.name"}, + }, + }, { + Name: "HOSTNAME", + ValueFrom: &v1.EnvVarSource{ + FieldRef: &v1.ObjectFieldSelector{FieldPath: "spec.nodeName"}, + }, + }, { + // Not used anywhere for logs but passed to the config-reloader since it + // expects everything is coming from a StatefulSet. + Name: "SHARD", + Value: "0", + }} + + var ( + privileged bool = true + runAsUser int64 = 0 + + terminationGracePeriodSeconds = int64(4800) + ) + + operatorContainers := []v1.Container{ + { + Name: "config-reloader", + Image: "quay.io/prometheus-operator/prometheus-config-reloader:v0.47.0", + VolumeMounts: volumeMounts, + Env: envVars, + SecurityContext: &v1.SecurityContext{ + Privileged: &privileged, + RunAsUser: &runAsUser, + }, + Args: []string{ + "--config-file=/var/lib/grafana-agent/config-in/agent.yml", + "--config-envsubst-file=/var/lib/grafana-agent/config/agent.yml", + + "--watch-interval=1m", + "--statefulset-ordinal-from-envvar=SHARD", + + // Use specifically the reload-port for reloading, since the primary + // server can shut down in between reloads. + "--reload-url=http://127.0.0.1:8081/-/reload", + }, + }, + { + Name: "grafana-agent", + Image: imagePath, + Ports: ports, + Args: agentArgs, + VolumeMounts: volumeMounts, + Env: envVars, + ReadinessProbe: &v1.Probe{ + Handler: v1.Handler{ + HTTPGet: &v1.HTTPGetAction{ + Path: "/-/ready", + Port: intstr.FromString(d.Agent.Spec.PortName), + }, + }, + InitialDelaySeconds: 10, + TimeoutSeconds: probeTimeoutSeconds, + PeriodSeconds: 5, + }, + Resources: d.Agent.Spec.Resources, + TerminationMessagePolicy: v1.TerminationMessageFallbackToLogsOnError, + }, + } + + containers, err := clientutil.MergePatchContainers(operatorContainers, d.Agent.Spec.Containers) + if err != nil { + return nil, fmt.Errorf("failed to merge containers spec: %w", err) + } + + return &apps_v1.DaemonSetSpec{ + UpdateStrategy: apps_v1.DaemonSetUpdateStrategy{ + Type: apps_v1.RollingUpdateDaemonSetStrategyType, + }, + Selector: &meta_v1.LabelSelector{ + MatchLabels: finalSelectorLabels, + }, + Template: v1.PodTemplateSpec{ + ObjectMeta: meta_v1.ObjectMeta{ + Labels: finalLabels, + Annotations: podAnnotations, + }, + Spec: v1.PodSpec{ + Containers: containers, + InitContainers: d.Agent.Spec.InitContainers, + SecurityContext: d.Agent.Spec.SecurityContext, + ServiceAccountName: d.Agent.Spec.ServiceAccountName, + NodeSelector: d.Agent.Spec.NodeSelector, + PriorityClassName: d.Agent.Spec.PriorityClassName, + TerminationGracePeriodSeconds: &terminationGracePeriodSeconds, + Volumes: volumes, + Tolerations: d.Agent.Spec.Tolerations, + Affinity: d.Agent.Spec.Affinity, + TopologySpreadConstraints: d.Agent.Spec.TopologySpreadConstraints, + }, + }, + }, nil +} diff --git a/pkg/operator/resources.go b/pkg/operator/resources_metrics.go similarity index 98% rename from pkg/operator/resources.go rename to pkg/operator/resources_metrics.go index 42f49c71649a..0d557371e25b 100644 --- a/pkg/operator/resources.go +++ b/pkg/operator/resources_metrics.go @@ -32,7 +32,7 @@ var ( probeTimeoutSeconds int32 = 3 ) -func generateStatefulSetService(cfg *Config, d config.Deployment) *v1.Service { +func generateMetricsStatefulSetService(cfg *Config, d config.Deployment) *v1.Service { d = *d.DeepCopy() if d.Agent.Spec.PortName == "" { @@ -71,7 +71,7 @@ func generateStatefulSetService(cfg *Config, d config.Deployment) *v1.Service { } } -func generateStatefulSet( +func generateMetricsStatefulSet( cfg *Config, name string, d config.Deployment, @@ -99,7 +99,7 @@ func generateStatefulSet( d.Agent.Spec.Resources.Requests = v1.ResourceList{} } - spec, err := generateStatefulSetSpec(cfg, name, d, shard) + spec, err := generateMetricsStatefulSetSpec(cfg, name, d, shard) if err != nil { return nil, err } @@ -185,7 +185,7 @@ func generateStatefulSet( return ss, nil } -func generateStatefulSetSpec( +func generateMetricsStatefulSetSpec( cfg *Config, name string, d config.Deployment, diff --git a/pkg/operator/secondary_resource.go b/pkg/operator/secondary_resource.go index 89314c3aafe4..7b9a89b93cd2 100644 --- a/pkg/operator/secondary_resource.go +++ b/pkg/operator/secondary_resource.go @@ -18,6 +18,8 @@ const ( resourcePodMonitor resourceProbe resourceSecret + resourceLogsInstance + resourcePodLogs ) // secondaryResources is the list of valid secondaryResources. @@ -27,6 +29,8 @@ var secondaryResources = []secondaryResource{ resourcePodMonitor, resourceProbe, resourceSecret, + resourceLogsInstance, + resourcePodLogs, } // eventHandlers is a set of EnqueueRequestForSelector event handlers, one per diff --git a/production/operator/crds/monitoring.grafana.com_pod-logs.yaml b/production/operator/crds/monitoring.grafana.com_podlogs.yaml similarity index 99% rename from production/operator/crds/monitoring.grafana.com_pod-logs.yaml rename to production/operator/crds/monitoring.grafana.com_podlogs.yaml index e6e86c08837f..c40cb7ab6d2b 100644 --- a/production/operator/crds/monitoring.grafana.com_pod-logs.yaml +++ b/production/operator/crds/monitoring.grafana.com_podlogs.yaml @@ -6,7 +6,7 @@ metadata: annotations: controller-gen.kubebuilder.io/version: v0.6.2 creationTimestamp: null - name: pod-logs.monitoring.grafana.com + name: podlogs.monitoring.grafana.com spec: group: monitoring.grafana.com names: @@ -14,8 +14,8 @@ spec: - agent-operator kind: PodLogs listKind: PodLogsList - plural: pod-logs - singular: pod-logs + plural: podlogs + singular: podlogs scope: Namespaced versions: - name: v1alpha1 From 21663adbbfe1407f58f2fdd0122bdbf9518753f6 Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Fri, 16 Jul 2021 12:33:13 -0400 Subject: [PATCH 2/9] changelog entry --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a440dc6ca8d1..42628a5064a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ - [FEATURE] Put Tests requiring Network Access behind a -online flag (@flokli) +- [FEATURE] Add logging support to the Grafana Agent Operator. (@rfratto) + - [ENHANCEMENT] The Grafana Agent Operator will now default to deploying the matching release version of the Grafana Agent instead of v0.14.0. (@rfratto) @@ -20,7 +22,7 @@ the $HOSTNAME variable in agent config. (@dfrankel33) - [BUGFIX] Regex capture groups like `${1}` will now be kept intact when - using `-config.expand-env`. + using `-config.expand-env`. (@rfratto) - [BUGFIX] The directory of the logs positions file will now properly be created on startup for all instances. From 4d1d1fc7d62c1a669d1cb90ff3de3b686238247a Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Fri, 16 Jul 2021 16:52:38 -0400 Subject: [PATCH 3/9] working example e2e --- Makefile | 2 +- cmd/agent-operator/agent-example-config.yaml | 55 ++++++- cmd/agent-operator/example-grafana.yaml | 135 ++++++++++++++++++ cmd/agent-operator/example-loki.yaml | 112 +++++++++++++++ pkg/logs/logs.go | 6 + .../apis/monitoring/v1alpha1/types_logs.go | 11 +- .../v1alpha1/zz_generated.deepcopy.go | 13 +- pkg/operator/clientutil/clientutil.go | 4 +- pkg/operator/config/config.go | 57 +++++++- pkg/operator/config/logs_templates_test.go | 18 ++- .../component/logs/pod_logs.libsonnet | 8 ++ .../templates/component/logs/stages.libsonnet | 9 +- .../config/templates/ext/marshal.libsonnet | 5 + pkg/operator/config/utils.go | 64 +++++++++ pkg/operator/resources_logs.go | 2 + pkg/operator/resources_metrics.go | 3 + .../crds/monitoring.grafana.com_podlogs.yaml | 11 +- 17 files changed, 479 insertions(+), 36 deletions(-) create mode 100644 cmd/agent-operator/example-grafana.yaml create mode 100644 cmd/agent-operator/example-loki.yaml diff --git a/Makefile b/Makefile index 767f0d96d3f5..d7e358659cb9 100644 --- a/Makefile +++ b/Makefile @@ -133,7 +133,7 @@ endif # CRDs # ######## -crds: +crds: build-image/.uptodate ifeq ($(BUILD_IN_CONTAINER),true) mkdir -p $(shell pwd)/.pkg mkdir -p $(shell pwd)/.cache diff --git a/cmd/agent-operator/agent-example-config.yaml b/cmd/agent-operator/agent-example-config.yaml index ef26b1d8b97e..3ddd1782b25a 100644 --- a/cmd/agent-operator/agent-example-config.yaml +++ b/cmd/agent-operator/agent-example-config.yaml @@ -6,7 +6,7 @@ metadata: labels: app: grafana-agent-example spec: - image: grafana/agent:v0.14.0 + image: grafana/agent:v0.18.0 logLevel: info serviceAccountName: grafana-agent storage: @@ -15,6 +15,10 @@ spec: resources: requests: storage: 1Gi + logs: + instanceSelector: + matchLabels: + agent: grafana-agent-example prometheus: instanceSelector: matchLabels: @@ -47,19 +51,56 @@ spec: --- +apiVersion: monitoring.grafana.com/v1alpha1 +kind: LogsInstance +metadata: + name: primary + namespace: default + labels: + agent: grafana-agent-example +spec: + clients: + - url: http://loki:8080/loki/api/v1/push + + # Supply an empty namespace selector to look in all namespaces. + podLogsNamespaceSelector: {} + podLogsSelector: + matchLabels: + instance: primary + +--- + +# Have the Agent monitor itself. apiVersion: monitoring.coreos.com/v1 -kind: ServiceMonitor +kind: PodMonitor +metadata: + name: grafana-agents + namespace: default + labels: + instance: primary +spec: + selector: + matchLabels: + app.kubernetes.io/name: grafana-agent + podMetricsEndpoints: + - port: http-metrics + +--- + +# Have the Agent get logs from itself. +apiVersion: monitoring.grafana.com/v1alpha1 +kind: PodLogs metadata: - name: kube-dns - namespace: kube-system + name: grafana-agents + namespace: default labels: instance: primary spec: selector: matchLabels: - k8s-app: kube-dns - endpoints: - - port: metrics + app.kubernetes.io/name: grafana-agent + pipelineStages: + - cri: {} # # Pretend credentials diff --git a/cmd/agent-operator/example-grafana.yaml b/cmd/agent-operator/example-grafana.yaml new file mode 100644 index 000000000000..84cb8bc76ba4 --- /dev/null +++ b/cmd/agent-operator/example-grafana.yaml @@ -0,0 +1,135 @@ +## example-grafana.yaml contains a tiny Grafana deployment used for testing the +## Grafana Agent Operator. + +--- + +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: grafana + namespace: default + labels: + app: grafana +spec: + serviceName: grafana + replicas: 1 + selector: + matchLabels: + app: grafana + template: + metadata: + labels: + app: grafana + spec: + containers: + - name: loki + image: grafana/grafana:8.0.4 + args: + - --homepath=/usr/share/grafana + - --config=/etc/grafana-config/grafana.ini + ports: + - name: http + containerPort: 8080 + volumeMounts: + - mountPath: /etc/grafana/provisioning/datasources + name: grafana-datasources + readOnly: true + - mountPath: /etc/grafana-config + name: config + readOnly: true + volumes: + - name: config + configMap: + name: grafana-config + - name: grafana-datasources + configMap: + name: grafana-datasources + +--- + +apiVersion: v1 +kind: Service +metadata: + name: grafana + namespace: default + labels: + app: grafana +spec: + selector: + app: grafana + ports: + - name: http + port: 8080 + targetPort: 8080 + +--- + +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-config + namespace: default + labels: + app: grafana +data: + grafana.ini: | + [server] + http_port = 8080 + root_url = http://grafana.k3d.localhost:30080/ + + [auth.anonymous] + enabled = true + org_role = "Admin" + + [analytics] + reporting_enabled = false + + [users] + default_theme = 'dark' + + [explore] + enabled = true + +--- + +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-datasources + namespace: default + labels: + app: grafana +data: + loki.yml: | + apiVersion: 1 + datasources: + - name: loki + type: loki + access: proxy + url: http://loki.default.svc.cluster.local:8080 + isDefault: true + version: 1 + editable: false + jsonData: + httpMethod: GET + +--- + +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: grafana + namespace: default + labels: + app: grafana +spec: + rules: + - host: grafana.k3d.localhost + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: grafana + port: { name: 'http' } diff --git a/cmd/agent-operator/example-loki.yaml b/cmd/agent-operator/example-loki.yaml new file mode 100644 index 000000000000..f4c96920483d --- /dev/null +++ b/cmd/agent-operator/example-loki.yaml @@ -0,0 +1,112 @@ +## example-loki.yaml contains a tiny Loki deployment used for testing the +## Grafana Agent Operator. + +--- + +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: loki + namespace: default + labels: + app: loki +spec: + serviceName: loki + replicas: 1 + selector: + matchLabels: + app: loki + template: + metadata: + labels: + app: loki + spec: + containers: + - name: loki + image: grafana/loki:2.2.1 + args: + - --config.file=/etc/loki/config.yml + - --server.http-listen-port=8080 + ports: + - name: http-metrics + containerPort: 9090 + volumeMounts: + - mountPath: /etc/loki + name: config + readOnly: true + - mountPath: /var/lib/data + name: data + volumes: + - name: config + configMap: + name: loki-config + - name: data + emptyDir: {} + +--- + +apiVersion: v1 +kind: Service +metadata: + name: loki + namespace: default + labels: + app: loki +spec: + selector: + app: loki + ports: + - name: http-metrics + port: 8080 + targetPort: 8080 + +--- + +apiVersion: v1 +kind: ConfigMap +metadata: + name: loki-config + namespace: default + labels: + app: loki +data: + config.yml: | + auth_enabled: false + server: + graceful_shutdown_timeout: 5m + http_server_idle_timeout: 120s + grpc_server_max_recv_msg_size: 25165824 + limits_config: + enforce_metric_name: false + reject_old_samples: true + reject_old_samples_max_age: 24h + ingester: + chunk_idle_period: 5m + chunk_retain_period: 30s + max_transfer_retries: 1 + lifecycler: + address: 127.0.0.1 + final_sleep: 0s + ring: + kvstore: + store: inmemory + replication_factor: 1 + schema_config: + configs: + - from: 2021-07-16 + store: boltdb + object_store: filesystem + schema: v11 + index: + prefix: index_ + period: 24h + storage_config: + boltdb: + directory: /var/lib/data/index + filesystem: + directory: /var/lib/data/chunks + chunk_store_config: + max_look_back_period: 0 + table_manager: + retention_deletes_enabled: true + retention_period: 48h diff --git a/pkg/logs/logs.go b/pkg/logs/logs.go index 0f42cde910f2..ceb11ca3f88a 100644 --- a/pkg/logs/logs.go +++ b/pkg/logs/logs.go @@ -142,6 +142,12 @@ func (i *Instance) ApplyConfig(c *InstanceConfig) error { i.mut.Lock() defer i.mut.Unlock() + positionsDir := filepath.Dir(c.PositionsConfig.PositionsFile) + err := os.MkdirAll(positionsDir, 0700) + if err != nil { + level.Warn(i.log).Log("msg", "failed to create the positions directory. logs may be unable to save their position", "path", positionsDir, "err", err) + } + // No-op if the configs haven't changed. if util.CompareYAML(c, i.cfg) { level.Debug(i.log).Log("msg", "instance config hasn't changed, not recreating Promtail") diff --git a/pkg/operator/apis/monitoring/v1alpha1/types_logs.go b/pkg/operator/apis/monitoring/v1alpha1/types_logs.go index a93de26e3fec..33473eb798f5 100644 --- a/pkg/operator/apis/monitoring/v1alpha1/types_logs.go +++ b/pkg/operator/apis/monitoring/v1alpha1/types_logs.go @@ -363,7 +363,16 @@ type MatchStageSpec struct { // Nested set of pipeline stages to execute when action: keep and the log // line matches selector. - Stages []*PipelineStageSpec `json:"stages,omitempty"` + // + // An example value for stages may be: + // + // stages: | + // - json: {} + // - labelAllow: [foo, bar] + // + // Note that stages is a string and because SIG API Machinery does not support + // recursive types. Be careful not to mistype anything. + Stages string `json:"stages,omitempty"` } // MetricsStageSpec is an action stage that allows for defining and updating diff --git a/pkg/operator/apis/monitoring/v1alpha1/zz_generated.deepcopy.go b/pkg/operator/apis/monitoring/v1alpha1/zz_generated.deepcopy.go index cf31dc97cb87..dd4f42dda5d9 100644 --- a/pkg/operator/apis/monitoring/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/operator/apis/monitoring/v1alpha1/zz_generated.deepcopy.go @@ -463,17 +463,6 @@ func (in *LogsTargetConfigSpec) DeepCopy() *LogsTargetConfigSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MatchStageSpec) DeepCopyInto(out *MatchStageSpec) { *out = *in - if in.Stages != nil { - in, out := &in.Stages, &out.Stages - *out = make([]*PipelineStageSpec, len(*in)) - for i := range *in { - if (*in)[i] != nil { - in, out := &(*in)[i], &(*out)[i] - *out = new(PipelineStageSpec) - (*in).DeepCopyInto(*out) - } - } - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MatchStageSpec. @@ -624,7 +613,7 @@ func (in *PipelineStageSpec) DeepCopyInto(out *PipelineStageSpec) { if in.Match != nil { in, out := &in.Match, &out.Match *out = new(MatchStageSpec) - (*in).DeepCopyInto(*out) + **out = **in } if in.Metrics != nil { in, out := &in.Metrics, &out.Metrics diff --git a/pkg/operator/clientutil/clientutil.go b/pkg/operator/clientutil/clientutil.go index abeb9ce9f0e6..c3c6c1a9e960 100644 --- a/pkg/operator/clientutil/clientutil.go +++ b/pkg/operator/clientutil/clientutil.go @@ -107,7 +107,7 @@ func CreateOrUpdateStatefulSet(ctx context.Context, c client.Client, ss *apps_v1 ss.SetAnnotations(mergeMaps(ss.Annotations, exist.Annotations)) err := c.Update(ctx, ss) - if k8s_errors.IsNotAcceptable(err) { + if k8s_errors.IsNotAcceptable(err) || k8s_errors.IsInvalid(err) { err = c.Delete(ctx, ss) if err != nil { return fmt.Errorf("failed to update statefulset: deleting old statefulset: %w", err) @@ -144,7 +144,7 @@ func CreateOrUpdateDaemonSet(ctx context.Context, c client.Client, ss *apps_v1.D ss.SetAnnotations(mergeMaps(ss.Annotations, exist.Annotations)) err := c.Update(ctx, ss) - if k8s_errors.IsNotAcceptable(err) { + if k8s_errors.IsNotAcceptable(err) || k8s_errors.IsInvalid(err) { err = c.Delete(ctx, ss) if err != nil { return fmt.Errorf("failed to update daemonset: deleting old daemonset: %w", err) diff --git a/pkg/operator/config/config.go b/pkg/operator/config/config.go index e47936755e64..e33aa2841858 100644 --- a/pkg/operator/config/config.go +++ b/pkg/operator/config/config.go @@ -15,7 +15,7 @@ import ( grafana "github.com/grafana/agent/pkg/operator/apis/monitoring/v1alpha1" "github.com/grafana/agent/pkg/operator/assets" prom "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - "gopkg.in/yaml.v2" + "gopkg.in/yaml.v3" ) // Type is the type of Agent deployment that a config is being generated @@ -159,6 +159,61 @@ func createVM(secrets assets.SecretStore) (*jsonnet.VM, error) { Params: ast.Identifiers{"text"}, Func: unmarshalYAML, }) + vm.NativeFunction(&jsonnet.NativeFunction{ + Name: "intoStages", + Params: ast.Identifiers{"text"}, + Func: func(i []interface{}) (interface{}, error) { + text, ok := i[0].(string) + if !ok { + return nil, jsonnet.RuntimeError{Msg: "text argument not string"} + } + + // The way this works is really bad. First we have to + // read in the raw YAML so we can convert it to JSON + // and get the underlying Spec. + var raw interface{} + if err := yaml.Unmarshal([]byte(text), &raw); err != nil { + return nil, jsonnet.RuntimeError{ + Msg: fmt.Sprintf("failed to unmarshal stages: %s", err.Error()), + } + } + + bb, err := json.Marshal(raw) + if err != nil { + return nil, jsonnet.RuntimeError{ + Msg: fmt.Sprintf("failed to unmarshal stages: %s", err.Error()), + } + } + + var ps []*grafana.PipelineStageSpec + if err := json.Unmarshal(bb, &ps); err != nil { + return nil, jsonnet.RuntimeError{ + Msg: fmt.Sprintf("failed to unmarshal stages: %s", err.Error()), + } + } + + // Then we need to convert each into their raw types. + rawPS := make([]interface{}, 0, len(ps)) + for _, stage := range ps { + bb, err := json.Marshal(structs.Map(stage)) + if err != nil { + return nil, jsonnet.RuntimeError{ + Msg: fmt.Sprintf("failed to unmarshal stages: %s", err.Error()), + } + } + + var v interface{} + if err := json.Unmarshal(bb, &v); err != nil { + return nil, jsonnet.RuntimeError{ + Msg: fmt.Sprintf("failed to unmarshal stages: %s", err.Error()), + } + } + + rawPS = append(rawPS, v) + } + return rawPS, nil + }, + }) vm.NativeFunction(&jsonnet.NativeFunction{ Name: "trimOptional", diff --git a/pkg/operator/config/logs_templates_test.go b/pkg/operator/config/logs_templates_test.go index 49b07c1dfc90..e0b98a0a9d3b 100644 --- a/pkg/operator/config/logs_templates_test.go +++ b/pkg/operator/config/logs_templates_test.go @@ -260,11 +260,11 @@ func TestLogsStages(t *testing.T) { Selector: `{app="pokey"}`, Action: "keep", DropCounterReason: "no_pokey", - Stages: []*gragent.PipelineStageSpec{{ - JSON: &gragent.JSONStageSpec{ - Expressions: map[string]string{"msg": "msg"}, - }, - }}, + Stages: util.Untab(` + - json: + expressions: + msg: msg + `), }, }}, expect: util.Untab(` @@ -498,6 +498,10 @@ func TestPodLogsConfig(t *testing.T) { target_label: container - target_label: job replacement: operator/podlogs + - source_labels: ['__meta_kubernetes_pod_uid', '__meta_kubernetes_pod_container_name'] + target_label: __path__ + separator: / + replacement: /var/log/pods/*$1/*.log `), }, } @@ -632,6 +636,10 @@ func TestLogsConfig(t *testing.T) { target_label: container - replacement: app/pod target_label: job + - source_labels: ['__meta_kubernetes_pod_uid', '__meta_kubernetes_pod_container_name'] + target_label: __path__ + separator: / + replacement: /var/log/pods/*$1/*.log `), }, { diff --git a/pkg/operator/config/templates/component/logs/pod_logs.libsonnet b/pkg/operator/config/templates/component/logs/pod_logs.libsonnet index 244c04c8ca7e..7ddb5c7e26fb 100644 --- a/pkg/operator/config/templates/component/logs/pod_logs.libsonnet +++ b/pkg/operator/config/templates/component/logs/pod_logs.libsonnet @@ -126,6 +126,14 @@ function( }, ]) + + // Kubernetes puts logs under subdirectories keyed pod UID and container_name. + [{ + source_labels: ['__meta_kubernetes_pod_uid', '__meta_kubernetes_pod_container_name'], + target_label: '__path__', + separator: '/', + replacement: '/var/log/pods/*$1/*.log', + }] + + std.map( function(c) new_relabel_config(c), k8s.array(podLogs.Spec.RelabelConfigs), diff --git a/pkg/operator/config/templates/component/logs/stages.libsonnet b/pkg/operator/config/templates/component/logs/stages.libsonnet index beb1a3dcb115..ffb0b15609cd 100644 --- a/pkg/operator/config/templates/component/logs/stages.libsonnet +++ b/pkg/operator/config/templates/component/logs/stages.libsonnet @@ -1,3 +1,4 @@ +local marshal = import 'ext/marshal.libsonnet'; local optionals = import 'ext/optionals.libsonnet'; // Creates a new stage. @@ -114,9 +115,11 @@ local new_stage = function(spec) { pipeline_name: optionals.string(spec.Match.PipelineName), action: optionals.string(spec.Match.Action), drop_counter_reason: optionals.string(spec.Match.DropCounterReason), - stages: std.map( - function(stage) new_stage(stage), - spec.Match.Stages, + stages: if spec.Match.Stages != '' then ( + std.map( + function(stage) new_stage(stage), + marshal.intoStages(spec.Match.Stages), + ) ), }, diff --git a/pkg/operator/config/templates/ext/marshal.libsonnet b/pkg/operator/config/templates/ext/marshal.libsonnet index 0b53f065d48b..da9870e0467e 100644 --- a/pkg/operator/config/templates/ext/marshal.libsonnet +++ b/pkg/operator/config/templates/ext/marshal.libsonnet @@ -4,4 +4,9 @@ // fromYAML unmarshals YAML text into an object. fromYAML(text):: std.native('unmarshalYAML')(text), + + // intoStages unmarshals YAML text into []*PipelineStageSpec. + // This is required because the "match" stage from Promtail is + // recursive and you can't define recrusive types in CRDs. + intoStages(text):: std.native('intoStages')(text), } diff --git a/pkg/operator/config/utils.go b/pkg/operator/config/utils.go index c5bc882ffb58..578056b1d34e 100644 --- a/pkg/operator/config/utils.go +++ b/pkg/operator/config/utils.go @@ -1,9 +1,13 @@ package config import ( + "encoding/json" + "fmt" "regexp" + "github.com/fatih/structs" jsonnet "github.com/google/go-jsonnet" + grafana "github.com/grafana/agent/pkg/operator/apis/monitoring/v1alpha1" "gopkg.in/yaml.v3" ) @@ -60,6 +64,66 @@ func trimSlice(s []interface{}) []interface{} { return res } +// intoStages converts the a yaml slice of stages into a Jsonnet array. +func intoStages(i []interface{}) (interface{}, error) { + text, ok := i[0].(string) + if !ok { + return nil, jsonnet.RuntimeError{Msg: "text argument not string"} + } + + // The way this works is really, really gross. We only need any of this + // because Kubernetes CRDs can't recursively define types, which we need + // for the match stage. + // + // 1. Convert YAML -> map[string]interface{} + // 2. Convert map[string]interface{} -> JSON + // 3. Convert JSON -> []*grafana.PipelineStageSpec + // 4. Convert []*grafana.PipelineStageSpec into []interface{}, where + // each interface{} has the type information lost so marshaling it + // again to JSON doesn't break anything. + var raw interface{} + if err := yaml.Unmarshal([]byte(text), &raw); err != nil { + return nil, jsonnet.RuntimeError{ + Msg: fmt.Sprintf("failed to unmarshal stages: %s", err.Error()), + } + } + + bb, err := json.Marshal(raw) + if err != nil { + return nil, jsonnet.RuntimeError{ + Msg: fmt.Sprintf("failed to unmarshal stages: %s", err.Error()), + } + } + + var ps []*grafana.PipelineStageSpec + if err := json.Unmarshal(bb, &ps); err != nil { + return nil, jsonnet.RuntimeError{ + Msg: fmt.Sprintf("failed to unmarshal stages: %s", err.Error()), + } + } + + // Then we need to convert each into their raw types. + rawPS := make([]interface{}, 0, len(ps)) + for _, stage := range ps { + bb, err := json.Marshal(structs.Map(stage)) + if err != nil { + return nil, jsonnet.RuntimeError{ + Msg: fmt.Sprintf("failed to unmarshal stages: %s", err.Error()), + } + } + + var v interface{} + if err := json.Unmarshal(bb, &v); err != nil { + return nil, jsonnet.RuntimeError{ + Msg: fmt.Sprintf("failed to unmarshal stages: %s", err.Error()), + } + } + + rawPS = append(rawPS, v) + } + return rawPS, nil +} + var invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) // SanitizeLabelName sanitizes a label name for Prometheus. diff --git a/pkg/operator/resources_logs.go b/pkg/operator/resources_logs.go index 2df31b422a79..d25f17a6327b 100644 --- a/pkg/operator/resources_logs.go +++ b/pkg/operator/resources_logs.go @@ -43,6 +43,7 @@ func generateLogsDaemonSet( labels[k] = v } labels[agentNameLabelName] = d.Agent.Name + labels[agentTypeLabel] = "logs" boolTrue := true ds := &apps_v1.DaemonSet{ @@ -216,6 +217,7 @@ func generateLogsDaemonSetSpec( "app.kubernetes.io/instance": d.Agent.Name, "grafana-agent": d.Agent.Name, agentNameLabelName: d.Agent.Name, + agentTypeLabel: "logs", } if d.Agent.Spec.PodMetadata != nil { for k, v := range d.Agent.Spec.PodMetadata.Labels { diff --git a/pkg/operator/resources_metrics.go b/pkg/operator/resources_metrics.go index 0d557371e25b..4377abfce558 100644 --- a/pkg/operator/resources_metrics.go +++ b/pkg/operator/resources_metrics.go @@ -29,6 +29,7 @@ var ( } shardLabelName = "operator.agent.grafana.com/shard" agentNameLabelName = "operator.agent.grafana.com/name" + agentTypeLabel = "operator.agent.grafana.com/type" probeTimeoutSeconds int32 = 3 ) @@ -118,6 +119,7 @@ func generateMetricsStatefulSet( labels[k] = v } labels[agentNameLabelName] = d.Agent.Name + labels[agentTypeLabel] = "metrics" boolTrue := true @@ -319,6 +321,7 @@ func generateMetricsStatefulSetSpec( "grafana-agent": d.Agent.Name, shardLabelName: fmt.Sprintf("%d", shard), agentNameLabelName: d.Agent.Name, + agentTypeLabel: "metrics", } if d.Agent.Spec.PodMetadata != nil { for k, v := range d.Agent.Spec.PodMetadata.Labels { diff --git a/production/operator/crds/monitoring.grafana.com_podlogs.yaml b/production/operator/crds/monitoring.grafana.com_podlogs.yaml index c40cb7ab6d2b..a11e31f11627 100644 --- a/production/operator/crds/monitoring.grafana.com_podlogs.yaml +++ b/production/operator/crds/monitoring.grafana.com_podlogs.yaml @@ -186,10 +186,13 @@ spec: Required. type: string stages: - description: 'Nested set of pipeline stages to execute when - action: keep and the log line matches selector.' - items: {} - type: array + description: "Nested set of pipeline stages to execute when + action: keep and the log line matches selector. \n An + example value for stages may be: \n stages: | - + json: {} - labelAllow: [foo, bar] \n Note that stages + is a string and because SIG API Machinery does not support + recursive types. Be careful not to mistype anything." + type: string required: - selector type: object From 8f342b3930adf8591a01cb7d01e8367fdffeef49 Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Tue, 20 Jul 2021 13:54:24 -0400 Subject: [PATCH 4/9] undo docs changes until a future PR --- docs/operator/getting-started.md | 3 -- docs/operator/upgrade-guide.md | 61 -------------------------------- docs/upgrade-guide/_index.md | 4 +-- 3 files changed, 1 insertion(+), 67 deletions(-) delete mode 100644 docs/operator/upgrade-guide.md diff --git a/docs/operator/getting-started.md b/docs/operator/getting-started.md index 4787a2a5ac71..503ad0d7fdbf 100644 --- a/docs/operator/getting-started.md +++ b/docs/operator/getting-started.md @@ -82,8 +82,6 @@ rules: resources: - grafana-agents - prometheus-instances - - logs-instances - - podlogs verbs: [get, list, watch] - apiGroups: [monitoring.coreos.com] resources: @@ -103,7 +101,6 @@ rules: - apiGroups: ["apps"] resources: - statefulsets - - daemonsets verbs: [get, list, watch, create, update, patch, delete] --- diff --git a/docs/operator/upgrade-guide.md b/docs/operator/upgrade-guide.md deleted file mode 100644 index edbb64e8533e..000000000000 --- a/docs/operator/upgrade-guide.md +++ /dev/null @@ -1,61 +0,0 @@ -+++ -title = "Upgrade guide" -weight = 200 -+++ - -# Upgrade guide - -This guide describes all breaking changes that have happened in prior releases -and how to migrate to newer versions of the Grafana Agent Operator. For -upgrading the Grafana Agent, please refer to its -[upgrade guide]({{< relref "../upgrade-guide" >}}) instead. - -## Unreleased - -These changes will come in a future version. - -### RBAC additions for logging support - -Now that the Grafana Agent Operator supports logs, the RBAC rules used by -the operator must be extended to LogsInstances, PodLogs, and DaemonSets. - -Example new ClusterRole for the Operator to use: - -```yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: grafana-agent-operator -rules: -- apiGroups: [monitoring.grafana.com] - resources: - - grafana-agents - - prometheus-instances - - logs-instances - - podlogs - verbs: [get, list, watch] -- apiGroups: [monitoring.coreos.com] - resources: - - podmonitors - - probes - - servicemonitors - verbs: [get, list, watch] -- apiGroups: [""] - resources: - - namespaces - verbs: [get, list, watch] -- apiGroups: [""] - resources: - - secrets - - services - verbs: [get, list, watch, create, update, patch, delete] -- apiGroups: ["apps"] - resources: - - statefulsets - - daemonsets - verbs: [get, list, watch, create, update, patch, delete] -``` - -These RBAC permissions do not need to be given to the GrafanaAgent resource -itself, just the operator. The RBAC permissions recommended for the GrafanaAgent -resource for metrics already cover logging support. diff --git a/docs/upgrade-guide/_index.md b/docs/upgrade-guide/_index.md index 011ab0157cbb..423bd67b6031 100644 --- a/docs/upgrade-guide/_index.md +++ b/docs/upgrade-guide/_index.md @@ -6,9 +6,7 @@ weight = 200 # Upgrade guide This guide describes all breaking changes that have happened in prior -releases and how to migrate to newer versions. For upgrading the -Grafana Agent Operator, please refer to its -[upgrade guide]({{< relref "./operator/upgrade-guide" >}}) instead. +releases and how to migrate to newer versions. ## Unreleased From d33d79ca93805f4dd8b022f3f3370ed640d84fb7 Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Tue, 20 Jul 2021 13:56:16 -0400 Subject: [PATCH 5/9] defer logs fix for a future PR --- pkg/logs/logs.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/logs/logs.go b/pkg/logs/logs.go index ceb11ca3f88a..b9d8a497b4ba 100644 --- a/pkg/logs/logs.go +++ b/pkg/logs/logs.go @@ -4,7 +4,6 @@ package logs import ( "fmt" "os" - "path/filepath" "sync" "time" @@ -58,6 +57,13 @@ func (l *Logs) ApplyConfig(c *Config) error { c = &Config{} } + if c.PositionsDirectory != "" { + err := os.MkdirAll(c.PositionsDirectory, 0700) + if err != nil { + level.Warn(l.l).Log("msg", "failed to create the positions directory. logs may be unable to save their position", "path", c.PositionsDirectory, "err", err) + } + } + newInstances := make(map[string]*Instance, len(c.Configs)) for _, ic := range c.Configs { @@ -142,12 +148,6 @@ func (i *Instance) ApplyConfig(c *InstanceConfig) error { i.mut.Lock() defer i.mut.Unlock() - positionsDir := filepath.Dir(c.PositionsConfig.PositionsFile) - err := os.MkdirAll(positionsDir, 0700) - if err != nil { - level.Warn(i.log).Log("msg", "failed to create the positions directory. logs may be unable to save their position", "path", positionsDir, "err", err) - } - // No-op if the configs haven't changed. if util.CompareYAML(c, i.cfg) { level.Debug(i.log).Log("msg", "instance config hasn't changed, not recreating Promtail") From 287a14a852bee96e1ef593157983df15604324cf Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Tue, 20 Jul 2021 14:05:21 -0400 Subject: [PATCH 6/9] remove duplicated intoStages code --- .../apis/monitoring/v1alpha1/types_logs.go | 5 +- pkg/operator/config/config.go | 52 +------------------ .../crds/monitoring.grafana.com_podlogs.yaml | 5 +- 3 files changed, 7 insertions(+), 55 deletions(-) diff --git a/pkg/operator/apis/monitoring/v1alpha1/types_logs.go b/pkg/operator/apis/monitoring/v1alpha1/types_logs.go index 33473eb798f5..1f81f247388d 100644 --- a/pkg/operator/apis/monitoring/v1alpha1/types_logs.go +++ b/pkg/operator/apis/monitoring/v1alpha1/types_logs.go @@ -370,8 +370,9 @@ type MatchStageSpec struct { // - json: {} // - labelAllow: [foo, bar] // - // Note that stages is a string and because SIG API Machinery does not support - // recursive types. Be careful not to mistype anything. + // Note that stages is a string because SIG API Machinery does not + // support recursive types, and so it cannot be validated for correctness. Be + // careful not to mistype anything. Stages string `json:"stages,omitempty"` } diff --git a/pkg/operator/config/config.go b/pkg/operator/config/config.go index e33aa2841858..6a2e4aecb027 100644 --- a/pkg/operator/config/config.go +++ b/pkg/operator/config/config.go @@ -162,57 +162,7 @@ func createVM(secrets assets.SecretStore) (*jsonnet.VM, error) { vm.NativeFunction(&jsonnet.NativeFunction{ Name: "intoStages", Params: ast.Identifiers{"text"}, - Func: func(i []interface{}) (interface{}, error) { - text, ok := i[0].(string) - if !ok { - return nil, jsonnet.RuntimeError{Msg: "text argument not string"} - } - - // The way this works is really bad. First we have to - // read in the raw YAML so we can convert it to JSON - // and get the underlying Spec. - var raw interface{} - if err := yaml.Unmarshal([]byte(text), &raw); err != nil { - return nil, jsonnet.RuntimeError{ - Msg: fmt.Sprintf("failed to unmarshal stages: %s", err.Error()), - } - } - - bb, err := json.Marshal(raw) - if err != nil { - return nil, jsonnet.RuntimeError{ - Msg: fmt.Sprintf("failed to unmarshal stages: %s", err.Error()), - } - } - - var ps []*grafana.PipelineStageSpec - if err := json.Unmarshal(bb, &ps); err != nil { - return nil, jsonnet.RuntimeError{ - Msg: fmt.Sprintf("failed to unmarshal stages: %s", err.Error()), - } - } - - // Then we need to convert each into their raw types. - rawPS := make([]interface{}, 0, len(ps)) - for _, stage := range ps { - bb, err := json.Marshal(structs.Map(stage)) - if err != nil { - return nil, jsonnet.RuntimeError{ - Msg: fmt.Sprintf("failed to unmarshal stages: %s", err.Error()), - } - } - - var v interface{} - if err := json.Unmarshal(bb, &v); err != nil { - return nil, jsonnet.RuntimeError{ - Msg: fmt.Sprintf("failed to unmarshal stages: %s", err.Error()), - } - } - - rawPS = append(rawPS, v) - } - return rawPS, nil - }, + Func: intoStages, }) vm.NativeFunction(&jsonnet.NativeFunction{ diff --git a/production/operator/crds/monitoring.grafana.com_podlogs.yaml b/production/operator/crds/monitoring.grafana.com_podlogs.yaml index a11e31f11627..cbafe844c410 100644 --- a/production/operator/crds/monitoring.grafana.com_podlogs.yaml +++ b/production/operator/crds/monitoring.grafana.com_podlogs.yaml @@ -190,8 +190,9 @@ spec: action: keep and the log line matches selector. \n An example value for stages may be: \n stages: | - json: {} - labelAllow: [foo, bar] \n Note that stages - is a string and because SIG API Machinery does not support - recursive types. Be careful not to mistype anything." + is a string because SIG API Machinery does not support + recursive types, and so it cannot be validated for correctness. + Be careful not to mistype anything." type: string required: - selector From b10bcf09df4d5f58d85c7752309bd4d431859052 Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Thu, 22 Jul 2021 09:49:58 -0400 Subject: [PATCH 7/9] undo base rebase change --- pkg/logs/logs.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pkg/logs/logs.go b/pkg/logs/logs.go index b9d8a497b4ba..0f42cde910f2 100644 --- a/pkg/logs/logs.go +++ b/pkg/logs/logs.go @@ -4,6 +4,7 @@ package logs import ( "fmt" "os" + "path/filepath" "sync" "time" @@ -57,13 +58,6 @@ func (l *Logs) ApplyConfig(c *Config) error { c = &Config{} } - if c.PositionsDirectory != "" { - err := os.MkdirAll(c.PositionsDirectory, 0700) - if err != nil { - level.Warn(l.l).Log("msg", "failed to create the positions directory. logs may be unable to save their position", "path", c.PositionsDirectory, "err", err) - } - } - newInstances := make(map[string]*Instance, len(c.Configs)) for _, ic := range c.Configs { From 45364f423b0245db612b00139a518b591f06c9c9 Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Thu, 22 Jul 2021 09:50:31 -0400 Subject: [PATCH 8/9] undo accidental commit --- pkg/operator/operator_test.go | 47 ----------------------------------- 1 file changed, 47 deletions(-) delete mode 100644 pkg/operator/operator_test.go diff --git a/pkg/operator/operator_test.go b/pkg/operator/operator_test.go deleted file mode 100644 index 7362879e22c9..000000000000 --- a/pkg/operator/operator_test.go +++ /dev/null @@ -1,47 +0,0 @@ -package operator - -import ( - "fmt" - "testing" - - gragent "github.com/grafana/agent/pkg/operator/apis/monitoring/v1alpha1" - "k8s.io/apimachinery/pkg/api/meta" - meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" -) - -func TestOperator(t *testing.T) { - instList := &gragent.GrafanaAgentList{ - TypeMeta: meta_v1.TypeMeta{ - Kind: "GrafanaAgentList", - APIVersion: gragent.SchemeGroupVersion.Identifier(), - }, - ListMeta: meta_v1.ListMeta{}, - Items: []*gragent.GrafanaAgent{ - {ObjectMeta: meta_v1.ObjectMeta{Name: "a"}}, - {ObjectMeta: meta_v1.ObjectMeta{Name: "b"}}, - }, - } - _ = instList - - meta.EachListItem(instList, func(o runtime.Object) error { - fmt.Printf("%#v\n", o) - return nil - }) - - /* - data, err := runtime.DefaultUnstructuredConverter.ToUnstructured(instList) - require.NoError(t, err) - us := unstructured.Unstructured{Object: data} - - _ = us.EachListItem(func(o runtime.Object) error { - us := o.(*unstructured.Unstructured) - - var a gragent.GrafanaAgent - runtime.DefaultUnstructuredConverter.FromUnstructured(us.Object, &a) - fmt.Printf("%#v\n", a) - return nil - }) - */ - -} From f76643a3990c4be575ff6e5fe6edcdf14a95e1ae Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Thu, 19 Aug 2021 14:03:53 -0400 Subject: [PATCH 9/9] review feedback --- cmd/agent-operator/agent-example-config.yaml | 2 +- cmd/agent-operator/example-grafana.yaml | 2 +- pkg/operator/clientutil/clientutil.go | 4 ++-- pkg/operator/config/templates/ext/marshal.libsonnet | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cmd/agent-operator/agent-example-config.yaml b/cmd/agent-operator/agent-example-config.yaml index 3ddd1782b25a..391fc6abba01 100644 --- a/cmd/agent-operator/agent-example-config.yaml +++ b/cmd/agent-operator/agent-example-config.yaml @@ -6,7 +6,7 @@ metadata: labels: app: grafana-agent-example spec: - image: grafana/agent:v0.18.0 + image: grafana/agent:v0.18.2 logLevel: info serviceAccountName: grafana-agent storage: diff --git a/cmd/agent-operator/example-grafana.yaml b/cmd/agent-operator/example-grafana.yaml index 84cb8bc76ba4..47ceda7ae1a0 100644 --- a/cmd/agent-operator/example-grafana.yaml +++ b/cmd/agent-operator/example-grafana.yaml @@ -22,7 +22,7 @@ spec: app: grafana spec: containers: - - name: loki + - name: grafana image: grafana/grafana:8.0.4 args: - --homepath=/usr/share/grafana diff --git a/pkg/operator/clientutil/clientutil.go b/pkg/operator/clientutil/clientutil.go index c3c6c1a9e960..f57b2af1b46e 100644 --- a/pkg/operator/clientutil/clientutil.go +++ b/pkg/operator/clientutil/clientutil.go @@ -110,11 +110,11 @@ func CreateOrUpdateStatefulSet(ctx context.Context, c client.Client, ss *apps_v1 if k8s_errors.IsNotAcceptable(err) || k8s_errors.IsInvalid(err) { err = c.Delete(ctx, ss) if err != nil { - return fmt.Errorf("failed to update statefulset: deleting old statefulset: %w", err) + return fmt.Errorf("failed to update statefulset when deleting old statefulset: %w", err) } err = c.Create(ctx, ss) if err != nil { - return fmt.Errorf("failed to update statefulset: creating new statefulset: %w", err) + return fmt.Errorf("failed to update statefulset when creating replacement statefulset: %w", err) } } else if err != nil { return fmt.Errorf("failed to update statefulset: %w", err) diff --git a/pkg/operator/config/templates/ext/marshal.libsonnet b/pkg/operator/config/templates/ext/marshal.libsonnet index da9870e0467e..3ad5106b6f32 100644 --- a/pkg/operator/config/templates/ext/marshal.libsonnet +++ b/pkg/operator/config/templates/ext/marshal.libsonnet @@ -7,6 +7,6 @@ // intoStages unmarshals YAML text into []*PipelineStageSpec. // This is required because the "match" stage from Promtail is - // recursive and you can't define recrusive types in CRDs. + // recursive and you can't define recursive types in CRDs. intoStages(text):: std.native('intoStages')(text), }