diff --git a/CHANGELOG.md b/CHANGELOG.md index c1b969557..8a7e27f09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ### Added +- [GH-1232](https://github.com/coreos/etcd-operator/pull/1232) the operator can now log critical actions like pod creation/deletion to a user specified path via the optional flag `debug-logfile-path`. The logs will only be generated if the cluster is self hosted and the flag is set. This can be used in conjunction with a persistent volume to persist the critical actions to disk for later inspection. + ### Changed ### Removed diff --git a/cmd/operator/main.go b/cmd/operator/main.go index a671de6fe..7bc083d61 100644 --- a/cmd/operator/main.go +++ b/cmd/operator/main.go @@ -28,6 +28,7 @@ import ( "github.com/coreos/etcd-operator/pkg/backup/s3/s3config" "github.com/coreos/etcd-operator/pkg/chaos" "github.com/coreos/etcd-operator/pkg/controller" + "github.com/coreos/etcd-operator/pkg/debug" "github.com/coreos/etcd-operator/pkg/garbagecollection" "github.com/coreos/etcd-operator/pkg/util/constants" "github.com/coreos/etcd-operator/pkg/util/k8sutil" @@ -72,6 +73,7 @@ var ( func init() { flag.BoolVar(&analyticsEnabled, "analytics", true, "Send analytical event (Cluster Created/Deleted etc.) to Google Analytics") + flag.StringVar(&debug.DebugFilePath, "debug-logfile-path", "", "only for a self hosted cluster, the path where the debug logfile will be written, recommended to be under: /var/tmp/etcd-operator/debug/ to avoid any issue with lack of write permissions") flag.StringVar(&pvProvisioner, "pv-provisioner", constants.PVProvisionerGCEPD, "persistent volume provisioner type") flag.StringVar(&awsSecret, "backup-aws-secret", "", diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 97c9b4321..7cfd23077 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -25,6 +25,7 @@ import ( "time" "github.com/coreos/etcd-operator/pkg/backup/s3/s3config" + "github.com/coreos/etcd-operator/pkg/debug" "github.com/coreos/etcd-operator/pkg/garbagecollection" "github.com/coreos/etcd-operator/pkg/spec" "github.com/coreos/etcd-operator/pkg/util/etcdutil" @@ -66,6 +67,8 @@ type Config struct { type Cluster struct { logger *logrus.Entry + // debug logger for self hosted cluster + debugLogger *debug.DebugLogger config Config @@ -93,14 +96,20 @@ type Cluster struct { func New(config Config, cl *spec.Cluster, stopC <-chan struct{}, wg *sync.WaitGroup) *Cluster { lg := logrus.WithField("pkg", "cluster").WithField("cluster-name", cl.Metadata.Name) + var debugLogger *debug.DebugLogger + if cl.Spec.SelfHosted != nil { + debugLogger = debug.New(cl.Metadata.Name) + } + c := &Cluster{ - logger: lg, - config: config, - cluster: cl, - eventCh: make(chan *clusterEvent, 100), - stopCh: make(chan struct{}), - status: cl.Status.Copy(), - gc: garbagecollection.New(config.KubeCli, cl.Metadata.Namespace), + logger: lg, + debugLogger: debugLogger, + config: config, + cluster: cl, + eventCh: make(chan *clusterEvent, 100), + stopCh: make(chan struct{}), + status: cl.Status.Copy(), + gc: garbagecollection.New(config.KubeCli, cl.Metadata.Namespace), } wg.Add(1) @@ -481,6 +490,12 @@ func (c *Cluster) removePod(name string) error { if !k8sutil.IsKubernetesResourceNotFoundError(err) { return err } + if c.IsDebugLoggerEnabled() { + c.debugLogger.LogMessage(fmt.Sprintf("pod (%s) not found while trying to delete it", name)) + } + } + if c.IsDebugLoggerEnabled() { + c.debugLogger.LogPodDeletion(name) } return nil } @@ -631,4 +646,15 @@ func (c *Cluster) logSpecUpdate(newSpec spec.ClusterSpec) { for _, m := range strings.Split(string(newSpecBytes), "\n") { c.logger.Info(m) } + + if c.IsDebugLoggerEnabled() { + c.debugLogger.LogClusterSpecUpdate(string(oldSpecBytes), string(newSpecBytes)) + } +} + +func (c *Cluster) IsDebugLoggerEnabled() bool { + if c.cluster.Spec.SelfHosted != nil && c.debugLogger != nil { + return true + } + return false } diff --git a/pkg/cluster/self_hosted.go b/pkg/cluster/self_hosted.go index 045b19d85..cecd202c8 100644 --- a/pkg/cluster/self_hosted.go +++ b/pkg/cluster/self_hosted.go @@ -135,6 +135,10 @@ func (c *Cluster) addOneSelfHostedMember() error { if err != nil { return err } + if c.IsDebugLoggerEnabled() { + c.debugLogger.LogPodCreation(pod) + } + // wait for the new pod to start and add itself into the etcd cluster. oldN := c.members.Size() err = c.waitNewMember(oldN, 6, newMember.Name) @@ -158,6 +162,9 @@ func (c *Cluster) newSelfHostedSeedMember() error { if err != nil { return err } + if c.IsDebugLoggerEnabled() { + c.debugLogger.LogPodCreation(pod) + } c.logger.Infof("self-hosted cluster created with seed member (%s)", newMember.Name) return nil @@ -195,6 +202,9 @@ func (c *Cluster) migrateBootMember() error { if err != nil { return err } + if c.IsDebugLoggerEnabled() { + c.debugLogger.LogPodCreation(pod) + } if c.cluster.Spec.SelfHosted.SkipBootMemberRemoval { c.logger.Infof("skipping boot member (%s) removal; you will need to remove it yourself", endpoint) diff --git a/pkg/debug/debug_logger.go b/pkg/debug/debug_logger.go new file mode 100644 index 000000000..29a687a65 --- /dev/null +++ b/pkg/debug/debug_logger.go @@ -0,0 +1,84 @@ +// Copyright 2017 The etcd-operator Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package debug + +import ( + "os" + "path" + + "github.com/coreos/etcd-operator/pkg/util/k8sutil" + + "github.com/Sirupsen/logrus" + "k8s.io/client-go/pkg/api/v1" +) + +var ( + // This flag should be set to enable debug logging + DebugFilePath string +) + +type DebugLogger struct { + // regular log to stdout + logger *logrus.Entry + // log to file for debugging self hosted clusters + fileLogger *logrus.Logger +} + +func New(clusterName string) *DebugLogger { + if len(DebugFilePath) == 0 { + return nil + } + + logger := logrus.WithField("pkg", "debug") + err := os.MkdirAll(path.Dir(DebugFilePath), 0755) + if err != nil { + logger.Errorf("Could not create debug log directory (%v), debug logging will not be performed: %v", path.Dir(DebugFilePath), err) + return nil + } + + logFile, err := os.OpenFile(DebugFilePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) + if err != nil { + logger.Errorf("failed to open debug log file(%v): %v", DebugFilePath, err) + return nil + } + + l := logrus.New() + l.Out = logFile + l.Infof("Starting debug logs for self-hosted etcd cluster: %v", clusterName) + return &DebugLogger{ + logger: logrus.WithField("pkg", "debug"), + fileLogger: l, + } +} + +func (dl *DebugLogger) LogPodCreation(pod *v1.Pod) { + podSpec, err := k8sutil.PodSpecToPrettyJSON(pod) + if err != nil { + dl.fileLogger.Infof("failed to get readable spec for pod(%v): %v ", pod.Name, err) + } + dl.fileLogger.Infof("created pod (%s) with spec: %s\n", pod.Name, podSpec) +} + +func (dl *DebugLogger) LogPodDeletion(podName string) { + dl.fileLogger.Infof("deleted pod (%s)", podName) +} + +func (dl *DebugLogger) LogClusterSpecUpdate(oldSpec, newSpec string) { + dl.fileLogger.Infof("spec update: \nOld:\n%v \nNew:\n%v\n", oldSpec, newSpec) +} + +func (dl *DebugLogger) LogMessage(msg string) { + dl.fileLogger.Infof(msg) +} diff --git a/pkg/util/k8sutil/pod_util.go b/pkg/util/k8sutil/pod_util.go index 86098cfff..d05d38a65 100644 --- a/pkg/util/k8sutil/pod_util.go +++ b/pkg/util/k8sutil/pod_util.go @@ -15,6 +15,7 @@ package k8sutil import ( + "encoding/json" "fmt" "github.com/coreos/etcd-operator/pkg/spec" @@ -168,3 +169,11 @@ func getPodReadyCondition(status *v1.PodStatus) *v1.PodCondition { } return nil } + +func PodSpecToPrettyJSON(pod *v1.Pod) (string, error) { + bytes, err := json.MarshalIndent(pod.Spec, "", " ") + if err != nil { + return "", err + } + return string(bytes), nil +}