Skip to content

Commit

Permalink
services: un-mark group services as deregistered if restart hook runs (
Browse files Browse the repository at this point in the history
…#16905)

* services: un-mark group services as deregistered if restart hook runs

This PR may fix a bug where group services will never be deregistered if the
group undergoes a task restart.

* e2e: add test case for restart and deregister group service

* cl: add cl

* e2e: add wait for service list call
  • Loading branch information
shoenig committed Apr 24, 2023
1 parent 93d1744 commit 9d833dc
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 13 deletions.
3 changes: 3 additions & 0 deletions .changelog/16905.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
services: Fixed a bug preventing group service deregistrations after alloc restarts
```
8 changes: 6 additions & 2 deletions client/allocrunner/groupservice_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,14 @@ func (h *groupServiceHook) Prerun() error {
defer func() {
// Mark prerun as true to unblock Updates
h.prerun = true
// Mark deregistered as false to allow de-registration
h.deregistered = false
h.mu.Unlock()
}()
return h.preRunLocked()
}

// caller must hold h.lock
// caller must hold h.mu
func (h *groupServiceHook) preRunLocked() error {
if len(h.services) == 0 {
return nil
Expand Down Expand Up @@ -188,6 +190,8 @@ func (h *groupServiceHook) PreTaskRestart() error {
defer func() {
// Mark prerun as true to unblock Updates
h.prerun = true
// Mark deregistered as false to allow de-registration
h.deregistered = false
h.mu.Unlock()
}()

Expand All @@ -201,7 +205,7 @@ func (h *groupServiceHook) PreKill() {

// implements the PreKill hook
//
// caller must hold h.lock
// caller must hold h.mu
func (h *groupServiceHook) preKillLocked() {
// If we have a shutdown delay deregister group services and then wait
// before continuing to kill tasks.
Expand Down
74 changes: 74 additions & 0 deletions e2e/consul/alloc_restart_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0

package consul

import (
"context"
"errors"
"testing"
"time"

"github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/shoenig/test/must"
"github.com/shoenig/test/wait"
)

func testAllocRestart(t *testing.T) {
nc := e2eutil.NomadClient(t)
cc := e2eutil.ConsulClient(t).Catalog()

const jobFile = "./input/alloc_restart.hcl"
jobID := "alloc-restart-" + uuid.Short()
jobIDs := []string{jobID}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
defer e2eutil.CleanupJobsAndGCWithContext(t, ctx, &jobIDs)

// register our job
err := e2eutil.Register(jobID, jobFile)
must.NoError(t, err)

// wait for our allocation to be running
allocID := e2eutil.SingleAllocID(t, jobID, "", 0)
e2eutil.WaitForAllocRunning(t, nc, allocID)

// make sure our service is registered
services, _, err := cc.Service("alloc-restart-http", "", nil)
must.NoError(t, err)
must.Len(t, 1, services)

// restart the alloc
stderr, err := e2eutil.Command("nomad", "alloc", "restart", allocID)
must.NoError(t, err, must.Sprintf("stderr: %s", stderr))

// wait for alloc running again
e2eutil.WaitForAllocRunning(t, nc, allocID)

// make sure our service is still registered
services, _, err = cc.Service("alloc-restart-http", "", nil)
must.NoError(t, err)
must.Len(t, 1, services)

err = e2eutil.StopJob(jobID)
must.NoError(t, err)

// make sure our service is no longer registered
f := func() error {
services, _, err = cc.Service("alloc-restart-http", "", nil)
if err != nil {
return err
}
if len(services) != 0 {
return errors.New("expected empty services")
}
return nil
}
must.Wait(t, wait.InitialSuccess(
wait.ErrorFunc(f),
wait.Timeout(10*time.Second),
wait.Gap(1*time.Second),
))
}
19 changes: 19 additions & 0 deletions e2e/consul/consul_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package consul

import (
"testing"

"github.com/hashicorp/nomad/e2e/e2eutil"
)

func TestConsul(t *testing.T) {
// todo: migrate the remaining consul tests

nomad := e2eutil.NomadClient(t)

e2eutil.WaitForLeader(t, nomad)
e2eutil.WaitForNodesReady(t, nomad, 1)

t.Run("testServiceReversion", testServiceReversion)
t.Run("testAllocRestart", testAllocRestart)
}
33 changes: 33 additions & 0 deletions e2e/consul/input/alloc_restart.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
job "alloc-restart" {
constraint {
attribute = "${attr.kernel.name}"
value = "linux"
}

group "group" {
network {
port "http" {
to = 8080
}
}

service {
name = "alloc-restart-http"
port = "http"
}

task "python" {
driver = "raw_exec"

config {
command = "python3"
args = ["-m", "http.server", "8080", "--directory", "/tmp"]
}

resources {
cpu = 16
memory = 32
}
}
}
}
11 changes: 0 additions & 11 deletions e2e/consul/service_revert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,6 @@ import (
"github.com/shoenig/test/must"
)

func TestConsul(t *testing.T) {
// todo: migrate the remaining consul tests

nomad := e2eutil.NomadClient(t)

e2eutil.WaitForLeader(t, nomad)
e2eutil.WaitForNodesReady(t, nomad, 1)

t.Run("testServiceReversion", testServiceReversion)
}

// testServiceReversion asserts we can
// - submit a job with a service
// - update that job and modify service
Expand Down

0 comments on commit 9d833dc

Please sign in to comment.