Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

services: un-mark group services as deregistered if restart hook runs #16905

Merged
merged 4 commits into from
Apr 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/16905.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
services: Fixed a bug preventing group service deregistrations after alloc restarts
```
8 changes: 6 additions & 2 deletions client/allocrunner/group_service_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,14 @@ func (h *groupServiceHook) Prerun() error {
defer func() {
// Mark prerun as true to unblock Updates
h.prerun = true
// Mark deregistered as false to allow de-registration
h.deregistered = false
h.mu.Unlock()
}()
return h.preRunLocked()
}

// caller must hold h.lock
// caller must hold h.mu
func (h *groupServiceHook) preRunLocked() error {
if len(h.services) == 0 {
return nil
Expand Down Expand Up @@ -188,6 +190,8 @@ func (h *groupServiceHook) PreTaskRestart() error {
defer func() {
// Mark prerun as true to unblock Updates
h.prerun = true
// Mark deregistered as false to allow de-registration
h.deregistered = false
h.mu.Unlock()
}()

Expand All @@ -201,7 +205,7 @@ func (h *groupServiceHook) PreKill() {

// implements the PreKill hook
//
// caller must hold h.lock
// caller must hold h.mu
func (h *groupServiceHook) preKillLocked() {
// If we have a shutdown delay deregister group services and then wait
// before continuing to kill tasks.
Expand Down
74 changes: 74 additions & 0 deletions e2e/consul/alloc_restart_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0

package consul

import (
"context"
"errors"
"testing"
"time"

"github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/shoenig/test/must"
"github.com/shoenig/test/wait"
)

func testAllocRestart(t *testing.T) {
nc := e2eutil.NomadClient(t)
cc := e2eutil.ConsulClient(t).Catalog()

const jobFile = "./input/alloc_restart.hcl"
jobID := "alloc-restart-" + uuid.Short()
jobIDs := []string{jobID}

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
defer e2eutil.CleanupJobsAndGCWithContext(t, ctx, &jobIDs)

// register our job
err := e2eutil.Register(jobID, jobFile)
must.NoError(t, err)

// wait for our allocation to be running
allocID := e2eutil.SingleAllocID(t, jobID, "", 0)
e2eutil.WaitForAllocRunning(t, nc, allocID)

// make sure our service is registered
services, _, err := cc.Service("alloc-restart-http", "", nil)
must.NoError(t, err)
must.Len(t, 1, services)

// restart the alloc
stderr, err := e2eutil.Command("nomad", "alloc", "restart", allocID)
must.NoError(t, err, must.Sprintf("stderr: %s", stderr))

// wait for alloc running again
e2eutil.WaitForAllocRunning(t, nc, allocID)

// make sure our service is still registered
services, _, err = cc.Service("alloc-restart-http", "", nil)
must.NoError(t, err)
must.Len(t, 1, services)

err = e2eutil.StopJob(jobID)
must.NoError(t, err)

// make sure our service is no longer registered
f := func() error {
services, _, err = cc.Service("alloc-restart-http", "", nil)
if err != nil {
return err
}
if len(services) != 0 {
return errors.New("expected empty services")
}
return nil
}
must.Wait(t, wait.InitialSuccess(
wait.ErrorFunc(f),
wait.Timeout(10*time.Second),
wait.Gap(1*time.Second),
))
}
19 changes: 19 additions & 0 deletions e2e/consul/consul_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package consul

import (
"testing"

"github.com/hashicorp/nomad/e2e/e2eutil"
)

func TestConsul(t *testing.T) {
// todo: migrate the remaining consul tests

nomad := e2eutil.NomadClient(t)

e2eutil.WaitForLeader(t, nomad)
e2eutil.WaitForNodesReady(t, nomad, 1)

t.Run("testServiceReversion", testServiceReversion)
t.Run("testAllocRestart", testAllocRestart)
}
33 changes: 33 additions & 0 deletions e2e/consul/input/alloc_restart.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
job "alloc-restart" {
constraint {
attribute = "${attr.kernel.name}"
value = "linux"
}

group "group" {
network {
port "http" {
to = 8080
}
}

service {
name = "alloc-restart-http"
port = "http"
}

task "python" {
driver = "raw_exec"

config {
command = "python3"
args = ["-m", "http.server", "8080", "--directory", "/tmp"]
}

resources {
cpu = 16
memory = 32
}
}
}
}
11 changes: 0 additions & 11 deletions e2e/consul/service_revert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,6 @@ import (
"github.com/shoenig/test/must"
)

func TestConsul(t *testing.T) {
// todo: migrate the remaining consul tests

nomad := e2eutil.NomadClient(t)

e2eutil.WaitForLeader(t, nomad)
e2eutil.WaitForNodesReady(t, nomad, 1)

t.Run("testServiceReversion", testServiceReversion)
}

// testServiceReversion asserts we can
// - submit a job with a service
// - update that job and modify service
Expand Down