Skip to content

Commit

Permalink
client: add NetworkStatus to Allocation (hashicorp#8657)
Browse files Browse the repository at this point in the history
  • Loading branch information
nickethier authored and fredrikhgrelland committed Oct 22, 2020
1 parent 3c6b7c0 commit 6c4f7c3
Show file tree
Hide file tree
Showing 22 changed files with 520 additions and 43 deletions.
42 changes: 42 additions & 0 deletions client/allocrunner/alloc_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -386,8 +386,14 @@ func (ar *allocRunner) Restore() error {
return err
}

ns, err := ar.stateDB.GetNetworkStatus(ar.id)
if err != nil {
return err
}

ar.stateLock.Lock()
ar.state.DeploymentStatus = ds
ar.state.NetworkStatus = ns
ar.stateLock.Unlock()

states := make(map[string]*structs.TaskState)
Expand Down Expand Up @@ -655,6 +661,22 @@ func (ar *allocRunner) clientAlloc(taskStates map[string]*structs.TaskState) *st
}
}

// Set the NetworkStatus and default DNSConfig if one is not returned from the client
netStatus := ar.state.NetworkStatus
if netStatus != nil {
a.NetworkStatus = netStatus
} else {
a.NetworkStatus = new(structs.AllocNetworkStatus)
}

if a.NetworkStatus.DNS == nil {
alloc := ar.Alloc()
nws := alloc.Job.LookupTaskGroup(alloc.TaskGroup).Networks
if len(nws) > 0 {
a.NetworkStatus.DNS = nws[0].DNS.Copy()
}
}

return a
}

Expand Down Expand Up @@ -700,6 +722,12 @@ func (ar *allocRunner) SetClientStatus(clientStatus string) {
ar.state.ClientStatus = clientStatus
}

func (ar *allocRunner) SetNetworkStatus(s *structs.AllocNetworkStatus) {
ar.stateLock.Lock()
defer ar.stateLock.Unlock()
ar.state.NetworkStatus = s.Copy()
}

// AllocState returns a copy of allocation state including a snapshot of task
// states.
func (ar *allocRunner) AllocState() *state.State {
Expand Down Expand Up @@ -855,6 +883,20 @@ func (ar *allocRunner) PersistState() error {
return nil
}

// persist network status, wrapping in a func to release state lock as early as possible
if err := func() error {
ar.stateLock.Lock()
defer ar.stateLock.Unlock()
if ar.state.NetworkStatus != nil {
if err := ar.stateDB.PutNetworkStatus(ar.id, ar.state.NetworkStatus); err != nil {
return err
}
}
return nil
}(); err != nil {
return err
}

// TODO: consider persisting deployment state along with task status.
// While we study why only the alloc is persisted, I opted to maintain current
// behavior and not risk adding yet more IO calls unnecessarily.
Expand Down
20 changes: 1 addition & 19 deletions client/allocrunner/alloc_runner_hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
cstructs "github.com/hashicorp/nomad/client/structs"
"github.com/hashicorp/nomad/client/taskenv"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/plugins/drivers"
)

type hookResourceSetter interface {
Expand Down Expand Up @@ -42,23 +41,6 @@ func (a *allocHookResourceSetter) SetAllocHookResources(res *cstructs.AllocHookR
}
}

type networkIsolationSetter interface {
SetNetworkIsolation(*drivers.NetworkIsolationSpec)
}

// allocNetworkIsolationSetter is a shim to allow the alloc network hook to
// set the alloc network isolation configuration without full access
// to the alloc runner
type allocNetworkIsolationSetter struct {
ar *allocRunner
}

func (a *allocNetworkIsolationSetter) SetNetworkIsolation(n *drivers.NetworkIsolationSpec) {
for _, tr := range a.ar.tasks {
tr.SetNetworkIsolation(n)
}
}

// allocHealthSetter is a shim to allow the alloc health watcher hook to set
// and clear the alloc health without full access to the alloc runner state
type allocHealthSetter struct {
Expand Down Expand Up @@ -159,7 +141,7 @@ func (ar *allocRunner) initRunnerHooks(config *clientconfig.Config) error {
newUpstreamAllocsHook(hookLogger, ar.prevAllocWatcher),
newDiskMigrationHook(hookLogger, ar.prevAllocMigrator, ar.allocDir),
newAllocHealthWatcherHook(hookLogger, alloc, hs, ar.Listener(), ar.consulClient),
newNetworkHook(hookLogger, ns, alloc, nm, nc),
newNetworkHook(hookLogger, ns, alloc, nm, nc, ar),
newGroupServiceHook(groupServiceHookConfig{
alloc: alloc,
consul: ar.consulClient,
Expand Down
2 changes: 1 addition & 1 deletion client/allocrunner/interfaces/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ type TaskStateHandler interface {
TaskStateUpdated()
}

// AllocStatsReporter gives acess to the latest resource usage from the
// AllocStatsReporter gives access to the latest resource usage from the
// allocation
type AllocStatsReporter interface {
LatestAllocStats(taskFilter string) (*cstructs.AllocResourceUsage, error)
Expand Down
48 changes: 40 additions & 8 deletions client/allocrunner/network_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,37 @@ import (
"github.com/hashicorp/nomad/plugins/drivers"
)

type networkIsolationSetter interface {
SetNetworkIsolation(*drivers.NetworkIsolationSpec)
}

// allocNetworkIsolationSetter is a shim to allow the alloc network hook to
// set the alloc network isolation configuration without full access
// to the alloc runner
type allocNetworkIsolationSetter struct {
ar *allocRunner
}

func (a *allocNetworkIsolationSetter) SetNetworkIsolation(n *drivers.NetworkIsolationSpec) {
for _, tr := range a.ar.tasks {
tr.SetNetworkIsolation(n)
}
}

type networkStatusSetter interface {
SetNetworkStatus(*structs.AllocNetworkStatus)
}

// networkHook is an alloc lifecycle hook that manages the network namespace
// for an alloc
type networkHook struct {
// setter is a callback to set the network isolation spec when after the
// isolationSetter is a callback to set the network isolation spec when after the
// network is created
setter networkIsolationSetter
isolationSetter networkIsolationSetter

// statusSetter is a callback to the alloc runner to set the network status once
// network setup is complete
networkStatusSetter networkStatusSetter

// manager is used when creating the network namespace. This defaults to
// bind mounting a network namespace descritor under /var/run/netns but
Expand All @@ -34,11 +59,15 @@ type networkHook struct {
logger hclog.Logger
}

func newNetworkHook(logger hclog.Logger, ns networkIsolationSetter,
alloc *structs.Allocation, netManager drivers.DriverNetworkManager,
netConfigurator NetworkConfigurator) *networkHook {
func newNetworkHook(logger hclog.Logger,
ns networkIsolationSetter,
alloc *structs.Allocation,
netManager drivers.DriverNetworkManager,
netConfigurator NetworkConfigurator,
networkStatusSetter networkStatusSetter) *networkHook {
return &networkHook{
setter: ns,
isolationSetter: ns,
networkStatusSetter: networkStatusSetter,
alloc: alloc,
manager: netManager,
networkConfigurator: netConfigurator,
Expand Down Expand Up @@ -69,13 +98,16 @@ func (h *networkHook) Prerun() error {

if spec != nil {
h.spec = spec
h.setter.SetNetworkIsolation(spec)
h.isolationSetter.SetNetworkIsolation(spec)
}

if created {
if err := h.networkConfigurator.Setup(context.TODO(), h.alloc, spec); err != nil {
status, err := h.networkConfigurator.Setup(context.TODO(), h.alloc, spec)
if err != nil {
return fmt.Errorf("failed to configure networking for alloc: %v", err)
}

h.networkStatusSetter.SetNetworkStatus(status)
}
return nil
}
Expand Down
19 changes: 17 additions & 2 deletions client/allocrunner/network_hook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,17 @@ func (m *mockNetworkIsolationSetter) SetNetworkIsolation(spec *drivers.NetworkIs
require.Exactly(m.t, m.expectedSpec, spec)
}

type mockNetworkStatusSetter struct {
t *testing.T
expectedStatus *structs.AllocNetworkStatus
called bool
}

func (m *mockNetworkStatusSetter) SetNetworkStatus(status *structs.AllocNetworkStatus) {
m.called = true
require.Exactly(m.t, m.expectedStatus, status)
}

// Test that the prerun and postrun hooks call the setter with the expected spec when
// the network mode is not host
func TestNetworkHook_Prerun_Postrun(t *testing.T) {
Expand Down Expand Up @@ -62,10 +73,14 @@ func TestNetworkHook_Prerun_Postrun(t *testing.T) {
t: t,
expectedSpec: spec,
}
statusSetter := &mockNetworkStatusSetter{
t: t,
expectedStatus: nil,
}
require := require.New(t)

logger := testlog.HCLogger(t)
hook := newNetworkHook(logger, setter, alloc, nm, &hostNetworkConfigurator{})
hook := newNetworkHook(logger, setter, alloc, nm, &hostNetworkConfigurator{}, statusSetter)
require.NoError(hook.Prerun())
require.True(setter.called)
require.False(destroyCalled)
Expand All @@ -76,7 +91,7 @@ func TestNetworkHook_Prerun_Postrun(t *testing.T) {
setter.called = false
destroyCalled = false
alloc.Job.TaskGroups[0].Networks[0].Mode = "host"
hook = newNetworkHook(logger, setter, alloc, nm, &hostNetworkConfigurator{})
hook = newNetworkHook(logger, setter, alloc, nm, &hostNetworkConfigurator{}, statusSetter)
require.NoError(hook.Prerun())
require.False(setter.called)
require.False(destroyCalled)
Expand Down
6 changes: 3 additions & 3 deletions client/allocrunner/networking.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
// NetworkConfigurator sets up and tears down the interfaces, routes, firewall
// rules, etc for the configured networking mode of the allocation.
type NetworkConfigurator interface {
Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) error
Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error)
Teardown(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) error
}

Expand All @@ -19,8 +19,8 @@ type NetworkConfigurator interface {
// require further configuration
type hostNetworkConfigurator struct{}

func (h *hostNetworkConfigurator) Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) error {
return nil
func (h *hostNetworkConfigurator) Setup(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) {
return nil, nil
}
func (h *hostNetworkConfigurator) Teardown(context.Context, *structs.Allocation, *drivers.NetworkIsolationSpec) error {
return nil
Expand Down
4 changes: 2 additions & 2 deletions client/allocrunner/networking_bridge_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,9 @@ func (b *bridgeNetworkConfigurator) generateAdminChainRule() []string {
}

// Setup calls the CNI plugins with the add action
func (b *bridgeNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) error {
func (b *bridgeNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) {
if err := b.ensureForwardingRules(); err != nil {
return fmt.Errorf("failed to initialize table forwarding rules: %v", err)
return nil, fmt.Errorf("failed to initialize table forwarding rules: %v", err)
}

return b.cni.Setup(ctx, alloc, spec)
Expand Down
36 changes: 30 additions & 6 deletions client/allocrunner/networking_cni.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,25 +78,26 @@ func newCNINetworkConfiguratorWithConf(logger log.Logger, cniPath, cniInterfaceP
}

// Setup calls the CNI plugins with the add action
func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) error {
func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) (*structs.AllocNetworkStatus, error) {
if err := c.ensureCNIInitialized(); err != nil {
return err
return nil, err
}

// Depending on the version of bridge cni plugin used, a known race could occure
// where two alloc attempt to create the nomad bridge at the same time, resulting
// in one of them to fail. This rety attempts to overcome those erroneous failures.
const retry = 3
var firstError error
var res *cni.CNIResult
for attempt := 1; ; attempt++ {
//TODO eventually returning the IP from the result would be nice to have in the alloc
if _, err := c.cni.Setup(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc, c.ignorePortMappingHostIP))); err != nil {
var err error
if res, err = c.cni.Setup(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc, c.ignorePortMappingHostIP))); err != nil {
c.logger.Warn("failed to configure network", "err", err, "attempt", attempt)
switch attempt {
case 1:
firstError = err
case retry:
return fmt.Errorf("failed to configure network: %v", firstError)
return nil, fmt.Errorf("failed to configure network: %v", firstError)
}

// Sleep for 1 second + jitter
Expand All @@ -106,7 +107,30 @@ func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Alloc
break
}

return nil
netStatus := new(structs.AllocNetworkStatus)

if len(res.Interfaces) > 0 {
iface, name := func(r *cni.CNIResult) (*cni.Config, string) {
for i := range r.Interfaces {
return r.Interfaces[i], i
}
return nil, ""
}(res)

netStatus.InterfaceName = name
if len(iface.IPConfigs) > 0 {
netStatus.Address = iface.IPConfigs[0].IP.String()
}
}
if len(res.DNS) > 0 {
netStatus.DNS = &structs.DNSConfig{
Servers: res.DNS[0].Nameservers,
Searches: res.DNS[0].Search,
Options: res.DNS[0].Options,
}
}

return netStatus, nil

}

Expand Down
4 changes: 4 additions & 0 deletions client/allocrunner/state/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ type State struct {

// TaskStates is a snapshot of task states.
TaskStates map[string]*structs.TaskState

// NetworkStatus captures network details not known until runtime
NetworkStatus *structs.AllocNetworkStatus
}

// SetDeploymentStatus is a helper for updating the client-controlled
Expand Down Expand Up @@ -57,6 +60,7 @@ func (s *State) Copy() *State {
ClientDescription: s.ClientDescription,
DeploymentStatus: s.DeploymentStatus.Copy(),
TaskStates: taskStates,
NetworkStatus: s.NetworkStatus.Copy(),
}
}

Expand Down
1 change: 1 addition & 0 deletions client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -1887,6 +1887,7 @@ func (c *Client) AllocStateUpdated(alloc *structs.Allocation) {
stripped.ClientStatus = alloc.ClientStatus
stripped.ClientDescription = alloc.ClientDescription
stripped.DeploymentStatus = alloc.DeploymentStatus
stripped.NetworkStatus = alloc.NetworkStatus

select {
case c.allocUpdates <- stripped:
Expand Down
8 changes: 8 additions & 0 deletions client/state/errdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ func (m *ErrDB) PutDeploymentStatus(allocID string, ds *structs.AllocDeploymentS
return fmt.Errorf("Error!")
}

func (m *ErrDB) GetNetworkStatus(allocID string) (*structs.AllocNetworkStatus, error) {
return nil, fmt.Errorf("Error!")
}

func (m *ErrDB) PutNetworkStatus(allocID string, ns *structs.AllocNetworkStatus) error {
return fmt.Errorf("Error!")
}

func (m *ErrDB) GetTaskRunnerState(allocID string, taskName string) (*state.LocalState, *structs.TaskState, error) {
return nil, nil, fmt.Errorf("Error!")
}
Expand Down
5 changes: 5 additions & 0 deletions client/state/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ type StateDB interface {
GetDeploymentStatus(allocID string) (*structs.AllocDeploymentStatus, error)
PutDeploymentStatus(allocID string, ds *structs.AllocDeploymentStatus) error

// Get/Put NetworkStatus get and put the allocation's network
// status. It may be nil.
GetNetworkStatus(allocID string) (*structs.AllocNetworkStatus, error)
PutNetworkStatus(allocID string, ns *structs.AllocNetworkStatus) error

// GetTaskRunnerState returns the LocalState and TaskState for a
// TaskRunner. Either state may be nil if it is not found, but if an
// error is encountered only the error will be non-nil.
Expand Down
Loading

0 comments on commit 6c4f7c3

Please sign in to comment.