From 5cacfaf7a1fa7ac11c54ad1fa67cf13afccca646 Mon Sep 17 00:00:00 2001 From: Jesus Vazquez Date: Thu, 19 Apr 2018 23:32:49 +0800 Subject: [PATCH 1/9] Add driver.docker counter metric for OOM Killer events --- client/driver/docker.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/client/driver/docker.go b/client/driver/docker.go index b0b23bea522d..f459a63553f0 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -35,6 +35,7 @@ import ( shelpers "github.com/hashicorp/nomad/helper/stats" "github.com/hashicorp/nomad/nomad/structs" "github.com/mitchellh/mapstructure" + "github.com/armon/go-metrics" ) var ( @@ -1924,6 +1925,22 @@ func (h *DockerHandle) run() { h.logger.Printf("[ERR] driver.docker: failed to inspect container %s: %v", h.containerID, ierr) } else if container.State.OOMKilled { werr = fmt.Errorf("OOM Killed") + labels := []metrics.Label { + { + Name: "Image", + Value: h.Image, + }, + { + Name: "ImageID", + Value: h.ImageID, + }, + { + Name: "ContainerID", + Value: h.containerID, + }, + + } + metrics.IncrCounterWithLabels([]string{"driver", "docker","oom"},1,labels) } close(h.doneCh) From 54e178851be107940b27d54378b78139846c6599 Mon Sep 17 00:00:00 2001 From: Jesus Vazquez Date: Fri, 20 Apr 2018 12:10:20 +0800 Subject: [PATCH 2/9] Run goimports --- client/driver/docker.go | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/client/driver/docker.go b/client/driver/docker.go index f459a63553f0..53e1c99f7650 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -17,12 +17,13 @@ import ( "time" "github.com/armon/circbuf" - docker "github.com/fsouza/go-dockerclient" + "github.com/fsouza/go-dockerclient" "github.com/docker/docker/cli/config/configfile" "github.com/docker/docker/reference" "github.com/docker/docker/registry" + "github.com/armon/go-metrics" "github.com/hashicorp/go-multierror" "github.com/hashicorp/go-plugin" "github.com/hashicorp/nomad/client/allocdir" @@ -35,7 +36,6 @@ import ( shelpers "github.com/hashicorp/nomad/helper/stats" "github.com/hashicorp/nomad/nomad/structs" "github.com/mitchellh/mapstructure" - "github.com/armon/go-metrics" ) var ( @@ -1925,22 +1925,21 @@ func (h *DockerHandle) run() { h.logger.Printf("[ERR] driver.docker: failed to inspect container %s: %v", h.containerID, ierr) } else if container.State.OOMKilled { werr = fmt.Errorf("OOM Killed") - labels := []metrics.Label { + labels := []metrics.Label{ { - Name: "Image", - Value: h.Image, + Name: "Image", + Value: h.Image, }, { - Name: "ImageID", - Value: h.ImageID, + Name: "ImageID", + Value: h.ImageID, }, { - Name: "ContainerID", - Value: h.containerID, + Name: "ContainerID", + Value: h.containerID, }, - } - metrics.IncrCounterWithLabels([]string{"driver", "docker","oom"},1,labels) + metrics.IncrCounterWithLabels([]string{"driver", "docker", "oom"}, 1, labels) } close(h.doneCh) From feeee1b0370d5f142aa6a22a59c5bae6ae40a2b9 Mon Sep 17 00:00:00 2001 From: Jesus Vazquez Date: Fri, 4 May 2018 14:01:26 +0800 Subject: [PATCH 3/9] Add Job, taskgroupname, taskname, and allocid to the DockerHandle struct --- client/driver/docker.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/client/driver/docker.go b/client/driver/docker.go index 53e1c99f7650..ad7a1f5574d5 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -479,6 +479,10 @@ type DockerHandle struct { client *docker.Client waitClient *docker.Client logger *log.Logger + jobName string + taskGroupName string + taskName string + allocID string Image string ImageID string containerID string From 8bc4eb261d69e6cfe9d61818385847e51cc97e99 Mon Sep 17 00:00:00 2001 From: Jesus Vazquez Date: Fri, 4 May 2018 14:02:19 +0800 Subject: [PATCH 4/9] Initialize dockerhandle with jobname, taskgroupname, taskname and allocid --- client/driver/docker.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/client/driver/docker.go b/client/driver/docker.go index ad7a1f5574d5..1d841113b3ab 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -903,6 +903,10 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (*StartRespon executor: exec, pluginClient: pluginClient, logger: d.logger, + jobName: d.DriverContext.jobName, + taskGroupName: d.DriverContext.taskGroupName, + taskName: d.DriverContext.taskName, + allocID: d.DriverContext.allocID, Image: d.driverConfig.ImageName, ImageID: d.imageID, containerID: container.ID, From e7bd558fe96adda6b91e52e7cbe11876097b685a Mon Sep 17 00:00:00 2001 From: Jesus Vazquez Date: Fri, 4 May 2018 14:02:34 +0800 Subject: [PATCH 5/9] Update counter driver.docker.oom labels --- client/driver/docker.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/client/driver/docker.go b/client/driver/docker.go index 1d841113b3ab..7cc9e8c45e66 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -1935,16 +1935,20 @@ func (h *DockerHandle) run() { werr = fmt.Errorf("OOM Killed") labels := []metrics.Label{ { - Name: "Image", - Value: h.Image, + Name: "JobName", + Value: h.jobName, }, { - Name: "ImageID", - Value: h.ImageID, + Name: "TaskGroupName", + Value: h.taskGroupName, }, { - Name: "ContainerID", - Value: h.containerID, + Name: "TaskName", + Value: h.taskName, + }, + { + Name: "AllocID", + Value: h.allocID, }, } metrics.IncrCounterWithLabels([]string{"driver", "docker", "oom"}, 1, labels) From caeb596e05fa093296582eb3dfa77f29eec9d53e Mon Sep 17 00:00:00 2001 From: Jesus Vazquez Date: Mon, 21 May 2018 20:30:56 +0200 Subject: [PATCH 6/9] Remove allocid label from driver.docker.oom counter metric --- client/driver/docker.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/client/driver/docker.go b/client/driver/docker.go index 7cc9e8c45e66..015099ae1352 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -1946,10 +1946,6 @@ func (h *DockerHandle) run() { Name: "TaskName", Value: h.taskName, }, - { - Name: "AllocID", - Value: h.allocID, - }, } metrics.IncrCounterWithLabels([]string{"driver", "docker", "oom"}, 1, labels) } From a5166e9676d6f072fc9f4af6c6fac409accb6f78 Mon Sep 17 00:00:00 2001 From: Jesus Vazquez Date: Mon, 21 May 2018 20:32:50 +0200 Subject: [PATCH 7/9] Rename labels job, task_group and task --- client/driver/docker.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/client/driver/docker.go b/client/driver/docker.go index 015099ae1352..2eb472e134ed 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -1935,15 +1935,15 @@ func (h *DockerHandle) run() { werr = fmt.Errorf("OOM Killed") labels := []metrics.Label{ { - Name: "JobName", + Name: "job", Value: h.jobName, }, { - Name: "TaskGroupName", + Name: "task_group", Value: h.taskGroupName, }, { - Name: "TaskName", + Name: "task", Value: h.taskName, }, } From 48c5093bd2d95e74b63d0648ea686e00fc329bdf Mon Sep 17 00:00:00 2001 From: Jesus Vazquez Date: Mon, 21 May 2018 20:33:01 +0200 Subject: [PATCH 8/9] Remove allocID from dockerhandle struct --- client/driver/docker.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/client/driver/docker.go b/client/driver/docker.go index 2eb472e134ed..1e5dcf26daa7 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -482,7 +482,6 @@ type DockerHandle struct { jobName string taskGroupName string taskName string - allocID string Image string ImageID string containerID string @@ -906,7 +905,6 @@ func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (*StartRespon jobName: d.DriverContext.jobName, taskGroupName: d.DriverContext.taskGroupName, taskName: d.DriverContext.taskName, - allocID: d.DriverContext.allocID, Image: d.driverConfig.ImageName, ImageID: d.imageID, containerID: container.ID, From ae0b3d9ffa3de327e021362fe0c5f554a8435f13 Mon Sep 17 00:00:00 2001 From: Jesus Vazquez Date: Mon, 21 May 2018 20:37:18 +0200 Subject: [PATCH 9/9] Add job, task, taskgroup to open method --- client/driver/docker.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/client/driver/docker.go b/client/driver/docker.go index 1e5dcf26daa7..ae4465c28fba 100644 --- a/client/driver/docker.go +++ b/client/driver/docker.go @@ -1778,6 +1778,9 @@ func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, er executor: exec, pluginClient: pluginClient, logger: d.logger, + jobName: d.DriverContext.jobName, + taskGroupName: d.DriverContext.taskGroupName, + taskName: d.DriverContext.taskName, Image: pid.Image, ImageID: pid.ImageID, containerID: pid.ContainerID,