Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docker: periodically reconcile containers #6325

Merged
merged 9 commits into from
Oct 18, 2019
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions drivers/docker/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,25 @@ var (
Name: pluginName,
}

danglingContainersBlock = hclspec.NewObject(map[string]*hclspec.Spec{
"enabled": hclspec.NewDefault(
hclspec.NewAttr("enabled", "bool", false),
hclspec.NewLiteral(`true`),
),
"period": hclspec.NewDefault(
hclspec.NewAttr("period", "string", false),
hclspec.NewLiteral(`"5m"`),
),
"creation_grace": hclspec.NewDefault(
hclspec.NewAttr("creation_grace", "string", false),
hclspec.NewLiteral(`"5m"`),
),
"dry_run": hclspec.NewDefault(
hclspec.NewAttr("dry_run", "bool", false),
hclspec.NewLiteral(`false`),
),
})

// configSpec is the hcl specification returned by the ConfigSchema RPC
// and is used to parse the contents of the 'plugin "docker" {...}' block.
// Example:
Expand Down Expand Up @@ -195,6 +214,10 @@ var (
hclspec.NewAttr("container", "bool", false),
hclspec.NewLiteral("true"),
),
"dangling_containers": hclspec.NewDefault(
hclspec.NewBlock("dangling_containers", false, danglingContainersBlock),
hclspec.NewLiteral("{}"),
),
})), hclspec.NewLiteral(`{
image = true
container = true
Expand Down Expand Up @@ -491,6 +514,28 @@ type DockerVolumeDriverConfig struct {
Options hclutils.MapStrStr `codec:"options"`
}

// ContainerGCConfig controls the behavior of the GC reconciler to detects
// dangling nomad containers that aren't tracked due to docker/nomad bugs
type ContainerGCConfig struct {
notnoop marked this conversation as resolved.
Show resolved Hide resolved
// Enabled controls whether container reconciler is enabled
Enabled bool `codec:"enabled"`

// DryRun indicates that reconciler should log unexpectedly running containers
// if found without actually killing them
DryRun bool `codec:"dry_run"`

// PeriodStr controls the frequency of scanning containers
PeriodStr string `codec:"period"`
period time.Duration `codec:"-"`

// CreationGraceStr is the duration allowed for a newly created container
// to live without being registered as a running task in nomad.
// A container is treated as leaked if it lived more than grace duration
// and haven't been registered in tasks.
CreationGraceStr string `codec:"creation_grace"`
CreationGrace time.Duration `codec:"-"`
}

type DriverConfig struct {
Endpoint string `codec:"endpoint"`
Auth AuthConfig `codec:"auth"`
Expand Down Expand Up @@ -519,6 +564,8 @@ type GCConfig struct {
ImageDelay string `codec:"image_delay"`
imageDelayDuration time.Duration `codec:"-"`
Container bool `codec:"container"`

DanglingContainers ContainerGCConfig `codec:"dangling_containers"`
}

type VolumeConfig struct {
Expand All @@ -534,6 +581,8 @@ func (d *Driver) ConfigSchema() (*hclspec.Spec, error) {
return configSpec, nil
}

const danglingContainersCreationGraceMinimum = 1 * time.Minute

func (d *Driver) SetConfig(c *base.Config) error {
var config DriverConfig
if len(c.PluginConfig) != 0 {
Expand All @@ -551,6 +600,25 @@ func (d *Driver) SetConfig(c *base.Config) error {
d.config.GC.imageDelayDuration = dur
}

if len(d.config.GC.DanglingContainers.PeriodStr) > 0 {
dur, err := time.ParseDuration(d.config.GC.DanglingContainers.PeriodStr)
if err != nil {
return fmt.Errorf("failed to parse 'period' duration: %v", err)
}
d.config.GC.DanglingContainers.period = dur
notnoop marked this conversation as resolved.
Show resolved Hide resolved
}

if len(d.config.GC.DanglingContainers.CreationGraceStr) > 0 {
dur, err := time.ParseDuration(d.config.GC.DanglingContainers.CreationGraceStr)
if err != nil {
return fmt.Errorf("failed to parse 'creation_grace' duration: %v", err)
}
if dur < danglingContainersCreationGraceMinimum {
return fmt.Errorf("creation_grace is less than minimum, %v", danglingContainersCreationGraceMinimum)
}
d.config.GC.DanglingContainers.CreationGrace = dur
}

if c.AgentConfig != nil {
d.clientConfig = c.AgentConfig.Driver
}
Expand All @@ -568,6 +636,9 @@ func (d *Driver) SetConfig(c *base.Config) error {

d.coordinator = newDockerCoordinator(coordinatorConfig)

reconciler := newReconciler(d)
reconciler.Start()

return nil
}

Expand Down
23 changes: 22 additions & 1 deletion drivers/docker/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ var (
nvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
)

const (
dockerLabelTaskID = "com.hashicorp.nomad.task_id"
dockerLabelTaskName = "com.hashicorp.nomad.task_name"
dockerLabelAllocID = "com.hashicorp.nomad.alloc_id"
dockerLabelJobName = "com.hashicorp.nomad.job_name"
)

type Driver struct {
// eventer is used to handle multiplexing of TaskEvents calls such that an
// event can be broadcast to all callers
Expand Down Expand Up @@ -309,6 +316,10 @@ CREATE:
// the container is started
runningContainer, err := client.InspectContainer(container.ID)
if err != nil {
client.RemoveContainer(docker.RemoveContainerOptions{
schmichael marked this conversation as resolved.
Show resolved Hide resolved
ID: container.ID,
Force: true,
})
msg := "failed to inspect started container"
d.logger.Error(msg, "error", err)
client.RemoveContainer(docker.RemoveContainerOptions{
Expand Down Expand Up @@ -977,9 +988,19 @@ func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *T

if len(driverConfig.Labels) > 0 {
config.Labels = driverConfig.Labels
logger.Debug("applied labels on the container", "labels", config.Labels)
}

config.Labels = map[string]string{
dockerLabelTaskID: task.ID,
dockerLabelTaskName: task.Name,
dockerLabelAllocID: task.AllocID,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Were we just going to ship this label by default initially? I think we can ask users to pay the cost of at least 1 label, but I wasn't sure where we landed on more expanded labels.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done - kept dockerLabelAllocID and removed others.

dockerLabelJobName: task.JobName,
}
for k, v := range driverConfig.Labels {
config.Labels[k] = v
notnoop marked this conversation as resolved.
Show resolved Hide resolved
}
logger.Debug("applied labels on the container", "labels", config.Labels)

config.Env = task.EnvList()

containerName := fmt.Sprintf("%s-%s", strings.Replace(task.Name, "/", "_", -1), task.AllocID)
Expand Down
35 changes: 34 additions & 1 deletion drivers/docker/driver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -905,7 +905,8 @@ func TestDockerDriver_Labels(t *testing.T) {
t.Fatalf("err: %v", err)
}

require.Equal(t, 2, len(container.Config.Labels))
// expect to see 4 additional standard labels
require.Equal(t, len(cfg.Labels)+4, len(container.Config.Labels))
for k, v := range cfg.Labels {
require.Equal(t, v, container.Config.Labels[k])
}
Expand Down Expand Up @@ -1008,6 +1009,38 @@ func TestDockerDriver_CreateContainerConfig(t *testing.T) {
require.Equal(t, containerName, c.Name)
}

func TestDockerDriver_CreateContainerConfig_Labels(t *testing.T) {
t.Parallel()

task, cfg, _ := dockerTask(t)
task.AllocID = uuid.Generate()
task.JobName = "redis-demo-job"

cfg.Labels = map[string]string{
"user_label": "user_value",
}

require.NoError(t, task.EncodeConcreteDriverConfig(cfg))

dh := dockerDriverHarness(t, nil)
driver := dh.Impl().(*Driver)

c, err := driver.createContainerConfig(task, cfg, "org/repo:0.1")
require.NoError(t, err)

expectedLabels := map[string]string{
// user provided labels
"user_label": "user_value",
// default labels
"com.hashicorp.nomad.alloc_id": task.AllocID,
"com.hashicorp.nomad.job_name": "redis-demo-job",
"com.hashicorp.nomad.task_id": task.ID,
"com.hashicorp.nomad.task_name": "redis-demo",
}

require.Equal(t, expectedLabels, c.Config.Labels)
}

func TestDockerDriver_CreateContainerConfig_Logging(t *testing.T) {
t.Parallel()

Expand Down
Loading