diff --git a/CHANGELOG.md b/CHANGELOG.md index fcebeb16dac2..91b2677fb86c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,6 +72,7 @@ BUG FIXES: * csi: Fixed a bug where `nomad volume detach` would not accept prefixes for the node ID parameter. [[GH-9041](https://github.com/hashicorp/nomad/issues/9041)] * csi: Fixed a bug where `nomad alloc status -verbose` would display an error when querying volumes. [[GH-9354](https://github.com/hashicorp/nomad/issues/9354)] * csi: Fixed a bug where queries for CSI plugins could be interleaved, resulting in inconsistent counts of plugins. [[GH-9438](https://github.com/hashicorp/nomad/issues/9438)] + * driver/docker: Fixed a bug where the Docker daemon could block longer than the `kill_timeout`. [[GH-9502](https://github.com/hashicorp/nomad/issues/9502) * driver/docker: Fixed a bug where the default `image_delay` configuration was ignored if the `gc` configuration was not set. [[GH-9101](https://github.com/hashicorp/nomad/issues/9101)] * driver/raw_exec: Fixed a bug where raw_exec attempts to create a freezer cgroups for the tasks even when `no_cgroups` is set. [[GH-9328](https://github.com/hashicorp/nomad/issues/9328)] * ui: Fixed a bug in the volume status page where read allocations and write allocations were not displayed. [[GH-9377](https://github.com/hashicorp/nomad/issues/9377)] diff --git a/drivers/docker/driver.go b/drivers/docker/driver.go index b5677911d9d1..d8222a087a3d 100644 --- a/drivers/docker/driver.go +++ b/drivers/docker/driver.go @@ -1461,7 +1461,10 @@ func (d *Driver) SignalTask(taskID string, signal string) error { return fmt.Errorf("failed to parse signal: %v", err) } - return h.Signal(sig) + // TODO: review whether we can timeout in this and other Docker API + // calls without breaking the expected client behavior. + // see https://github.com/hashicorp/nomad/issues/9503 + return h.Signal(context.Background(), sig) } func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) { diff --git a/drivers/docker/handle.go b/drivers/docker/handle.go index 3e1854a76572..6a75603f79f6 100644 --- a/drivers/docker/handle.go +++ b/drivers/docker/handle.go @@ -103,7 +103,7 @@ func (h *taskHandle) Exec(ctx context.Context, cmd string, args []string) (*driv return execResult, nil } -func (h *taskHandle) Signal(s os.Signal) error { +func (h *taskHandle) Signal(ctx context.Context, s os.Signal) error { // Convert types sysSig, ok := s.(syscall.Signal) if !ok { @@ -116,8 +116,9 @@ func (h *taskHandle) Signal(s os.Signal) error { dockerSignal := docker.Signal(sysSig) opts := docker.KillContainerOptions{ - ID: h.containerID, - Signal: dockerSignal, + ID: h.containerID, + Signal: dockerSignal, + Context: ctx, } return h.client.KillContainer(opts) @@ -127,7 +128,10 @@ func (h *taskHandle) Signal(s os.Signal) error { func (h *taskHandle) Kill(killTimeout time.Duration, signal os.Signal) error { // Only send signal if killTimeout is set, otherwise stop container if killTimeout > 0 { - if err := h.Signal(signal); err != nil { + ctx, cancel := context.WithTimeout(context.Background(), killTimeout) + defer cancel() + + if err := h.Signal(ctx, signal); err != nil { // Container has already been removed. if strings.Contains(err.Error(), NoSuchContainerError) { h.logger.Debug("attempted to signal nonexistent container") @@ -146,7 +150,7 @@ func (h *taskHandle) Kill(killTimeout time.Duration, signal os.Signal) error { select { case <-h.waitCh: return nil - case <-time.After(killTimeout): + case <-ctx.Done(): } }