From 90653b364845291665517c2b0acc54ee2a50cd73 Mon Sep 17 00:00:00 2001 From: Nick Ethier Date: Fri, 6 Mar 2020 21:51:22 -0500 Subject: [PATCH 1/8] client: add CNI client fingerprinting --- client/config/config.go | 4 ++ client/fingerprint/cni.go | 73 +++++++++++++++++++++++++++++++ client/fingerprint/fingerprint.go | 8 ++++ client/fingerprint_manager.go | 18 ++++++++ 4 files changed, 103 insertions(+) create mode 100644 client/fingerprint/cni.go diff --git a/client/config/config.go b/client/config/config.go index d1de0d2eaa82..adb263818a03 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -234,6 +234,10 @@ type Config struct { // be specified with colon delimited CNIPath string + // CNIConfigDir is the directory where CNI network configuration is located. The + // client will use this path when fingerprinting CNI networks. + CNIConfigDir string + // BridgeNetworkName is the name to use for the bridge created in bridge // networking mode. This defaults to 'nomad' if not set BridgeNetworkName string diff --git a/client/fingerprint/cni.go b/client/fingerprint/cni.go new file mode 100644 index 000000000000..7e2825e9c779 --- /dev/null +++ b/client/fingerprint/cni.go @@ -0,0 +1,73 @@ +package fingerprint + +import ( + "fmt" + "os" + "strings" + + "github.com/containernetworking/cni/libcni" + log "github.com/hashicorp/go-hclog" +) + +type CNIFingerprint struct { + StaticFingerprinter + logger log.Logger + networks map[string]struct{} +} + +func NewCNIFingerprint(logger log.Logger) Fingerprint { + return &CNIFingerprint{logger: logger, networks: make(map[string]struct{})} +} + +func (f *CNIFingerprint) Fingerprint(req *FingerprintRequest, resp *FingerprintResponse) error { + confDir := req.Config.CNIConfigDir + networks := map[string]struct{}{} + if _, err := os.Stat(confDir); os.IsNotExist(err) { + f.logger.Debug("CNI config dir is not set or does not exist, skipping", "cni_config_dir", confDir) + resp.Detected = false + return nil + } + + files, err := libcni.ConfFiles(confDir, []string{".conf", ".conflist", ".json"}) + if err != nil { + return fmt.Errorf("failed to read cni conf files: %v", err) + } + + for _, confFile := range files { + if strings.HasSuffix(confFile, ".conflist") { + confList, err := libcni.ConfListFromFile(confFile) + if err != nil { + return fmt.Errorf("failed to load CNI config list file %s: %v", confFile, err) + } + if _, ok := networks[confList.Name]; ok { + f.logger.Warn("multiple CNI config names found, ignoring file", "name", confList.Name, "file", confFile) + continue + } + networks[confList.Name] = struct{}{} + } else { + conf, err := libcni.ConfFromFile(confFile) + if err != nil { + return fmt.Errorf("failed to load CNI config file %s: %v", confFile, err) + } + if _, ok := networks[conf.Network.Name]; ok { + f.logger.Warn("multiple CNI config names found, ignoring file", "name", conf.Network.Name, "file", confFile) + continue + } + networks[conf.Network.Name] = struct{}{} + } + } + + for name := range networks { + resp.AddAttribute(fmt.Sprintf("cni.network.%s", name), "1") + } + for name := range f.networks { + if _, ok := networks[name]; !ok { + resp.RemoveAttribute(fmt.Sprintf("cni.network.%s", name)) + } + } + f.networks = networks + resp.Detected = true + return nil +} + +func (f *CNIFingerprint) Reload() {} diff --git a/client/fingerprint/fingerprint.go b/client/fingerprint/fingerprint.go index f7d0e2279b70..c41fcbf259f0 100644 --- a/client/fingerprint/fingerprint.go +++ b/client/fingerprint/fingerprint.go @@ -31,6 +31,7 @@ var ( hostFingerprinters = map[string]Factory{ "arch": NewArchFingerprint, "consul": NewConsulFingerprint, + "cni": NewCNIFingerprint, "cpu": NewCPUFingerprint, "host": NewHostFingerprint, "memory": NewMemoryFingerprint, @@ -115,6 +116,13 @@ type Fingerprint interface { Periodic() (bool, time.Duration) } +// ReloadableFingerprint can be implemented if the fingerprinter needs to be run during client reload. +// If implemented, the client will call Reload during client reload then immediatly Fingerprint +type ReloadableFingerprint interface { + Fingerprint + Reload() +} + // StaticFingerprinter can be embedded in a struct that has a Fingerprint method // to make it non-periodic. type StaticFingerprinter struct{} diff --git a/client/fingerprint_manager.go b/client/fingerprint_manager.go index 84efed5bcb38..eff1a01f9d1c 100644 --- a/client/fingerprint_manager.go +++ b/client/fingerprint_manager.go @@ -24,6 +24,8 @@ type FingerprintManager struct { // associated node updateNodeAttributes func(*fingerprint.FingerprintResponse) *structs.Node + reloadableFps map[string]fingerprint.ReloadableFingerprint + logger log.Logger } @@ -44,6 +46,7 @@ func NewFingerprintManager( node: node, shutdownCh: shutdownCh, logger: logger.Named("fingerprint_mgr"), + reloadableFps: make(map[string]fingerprint.ReloadableFingerprint), } } @@ -103,6 +106,17 @@ func (fp *FingerprintManager) Run() error { return nil } +// Reload will reload any registered ReloadableFingerprinters and immediatly call Fingerprint +func (fm *FingerprintManager) Reload() { + for name, fp := range fm.reloadableFps { + fm.logger.Info("reloading fingerprinter", "fingerprinter", name) + fp.Reload() + if _, err := fm.fingerprint(name, fp); err != nil { + fm.logger.Warn("error fingerprinting after reload", "fingerprinter", name, "error", err) + } + } +} + // setupFingerprints is used to fingerprint the node to see if these attributes are // supported func (fm *FingerprintManager) setupFingerprinters(fingerprints []string) error { @@ -130,6 +144,10 @@ func (fm *FingerprintManager) setupFingerprinters(fingerprints []string) error { if p { go fm.runFingerprint(f, period, name) } + + if rfp, ok := f.(fingerprint.ReloadableFingerprint); ok { + fm.reloadableFps[name] = rfp + } } fm.logger.Debug("detected fingerprints", "node_attrs", appliedFingerprints) From 07d46acb4bfb843098c01330f99e52004666a671 Mon Sep 17 00:00:00 2001 From: Nick Ethier Date: Fri, 20 Mar 2020 20:25:06 -0400 Subject: [PATCH 2/8] cni: use NodeResources.Networks for fingerprinted CNI networks --- client/config/config.go | 1 + client/fingerprint/cni.go | 24 +++--- client/fingerprint/cni_test.go | 79 +++++++++++++++++++ .../fingerprint/test_fixtures/cni/net1.conf | 17 ++++ .../test_fixtures/cni/net2.conflist | 26 ++++++ nomad/structs/structs.go | 51 +++--------- 6 files changed, 147 insertions(+), 51 deletions(-) create mode 100644 client/fingerprint/cni_test.go create mode 100644 client/fingerprint/test_fixtures/cni/net1.conf create mode 100644 client/fingerprint/test_fixtures/cni/net2.conflist diff --git a/client/config/config.go b/client/config/config.go index adb263818a03..0dfca7878e59 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -305,6 +305,7 @@ func DefaultConfig() *Config { }, BackwardsCompatibleMetrics: false, RPCHoldTimeout: 5 * time.Second, + CNIConfigDir: "/opt/cni/config", } } diff --git a/client/fingerprint/cni.go b/client/fingerprint/cni.go index 7e2825e9c779..682c029d4bda 100644 --- a/client/fingerprint/cni.go +++ b/client/fingerprint/cni.go @@ -7,16 +7,16 @@ import ( "github.com/containernetworking/cni/libcni" log "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/nomad/structs" ) type CNIFingerprint struct { StaticFingerprinter - logger log.Logger - networks map[string]struct{} + logger log.Logger } func NewCNIFingerprint(logger log.Logger) Fingerprint { - return &CNIFingerprint{logger: logger, networks: make(map[string]struct{})} + return &CNIFingerprint{logger: logger} } func (f *CNIFingerprint) Fingerprint(req *FingerprintRequest, resp *FingerprintResponse) error { @@ -57,17 +57,19 @@ func (f *CNIFingerprint) Fingerprint(req *FingerprintRequest, resp *FingerprintR } } + var nodeNetworks structs.Networks + for name := range networks { - resp.AddAttribute(fmt.Sprintf("cni.network.%s", name), "1") + nodeNetworks = append(nodeNetworks, &structs.NetworkResource{ + Mode: fmt.Sprintf("cni/%s", name), + }) + f.logger.Debug("detected CNI network", "name", name) } - for name := range f.networks { - if _, ok := networks[name]; !ok { - resp.RemoveAttribute(fmt.Sprintf("cni.network.%s", name)) - } + + resp.NodeResources = &structs.NodeResources{ + Networks: nodeNetworks, } - f.networks = networks + resp.Detected = true return nil } - -func (f *CNIFingerprint) Reload() {} diff --git a/client/fingerprint/cni_test.go b/client/fingerprint/cni_test.go new file mode 100644 index 000000000000..d89a930b27a2 --- /dev/null +++ b/client/fingerprint/cni_test.go @@ -0,0 +1,79 @@ +package fingerprint + +import ( + "testing" + + "github.com/hashicorp/nomad/client/config" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/stretchr/testify/require" +) + +func TestCNIFingerprint(t *testing.T) { + cases := []struct { + name string + req *FingerprintRequest + exp *FingerprintResponse + err bool + errMatch string + }{ + { + name: "cni config dir not set", + req: &FingerprintRequest{ + Config: &config.Config{}, + }, + exp: &FingerprintResponse{ + Detected: false, + }, + }, + { + name: "cni config dir non-existent", + req: &FingerprintRequest{ + Config: &config.Config{ + CNIConfigDir: "text_fixtures/cni_nonexistent", + }, + }, + exp: &FingerprintResponse{ + Detected: false, + }, + }, + { + name: "two networks, no errors", + req: &FingerprintRequest{ + Config: &config.Config{ + CNIConfigDir: "test_fixtures/cni", + }, + }, + exp: &FingerprintResponse{ + NodeResources: &structs.NodeResources{ + Networks: []*structs.NetworkResource{ + { + Mode: "cni/net1", + }, + { + Mode: "cni/net2", + }, + }, + }, + Detected: true, + }, + err: false, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + require := require.New(t) + fp := NewCNIFingerprint(testlog.HCLogger(t)) + resp := &FingerprintResponse{} + err := fp.Fingerprint(c.req, resp) + if c.err { + require.Error(err) + require.Contains(err.Error(), c.errMatch) + } else { + require.NoError(err) + require.Exactly(c.exp, resp) + } + }) + } +} diff --git a/client/fingerprint/test_fixtures/cni/net1.conf b/client/fingerprint/test_fixtures/cni/net1.conf new file mode 100644 index 000000000000..ea5dddb63d8e --- /dev/null +++ b/client/fingerprint/test_fixtures/cni/net1.conf @@ -0,0 +1,17 @@ +{ + "cniVersion": "0.2.0", + "name": "net1", + "type": "bridge", + "bridge": "cni0", + "isGateway": true, + "ipMasq": true, + "ipam": { + "type": "host-local", + "subnet": "10.22.0.0/16", + "routes": [ + { + "dst": "0.0.0.0/0" + } + ] + } +} diff --git a/client/fingerprint/test_fixtures/cni/net2.conflist b/client/fingerprint/test_fixtures/cni/net2.conflist new file mode 100644 index 000000000000..8d14cfcfb968 --- /dev/null +++ b/client/fingerprint/test_fixtures/cni/net2.conflist @@ -0,0 +1,26 @@ +{ + "cniVersion": "0.3.1", + "name": "net2", + "plugins": [ + { + "type": "ptp", + "ipMasq": true, + "ipam": { + "type": "host-local", + "subnet": "172.16.30.0/24", + "routes": [ + { + "dst": "0.0.0.0/0" + } + ] + } + }, + { + "type": "portmap", + "capabilities": { + "portMappings": true + }, + "externalSetMarkChain": "KUBE-MARK-MASQ" + } + ] +} diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 94d609805ae3..cde3c860ee92 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -2232,53 +2232,23 @@ type NetworkResource struct { DynamicPorts []Port // Host Dynamically assigned ports } -func (nr *NetworkResource) Equals(other *NetworkResource) bool { - if nr.Mode != other.Mode { - return false - } - - if nr.Device != other.Device { - return false - } - - if nr.CIDR != other.CIDR { - return false - } - - if nr.IP != other.IP { - return false - } - - if nr.MBits != other.MBits { - return false - } - - if len(nr.ReservedPorts) != len(other.ReservedPorts) { - return false - } +func (nr *NetworkResource) Hash() uint32 { + var data []byte + data = append(data, []byte(fmt.Sprintf("%s%s%s%s%d", nr.Mode, nr.Device, nr.CIDR, nr.IP, nr.MBits))...) for i, port := range nr.ReservedPorts { - if len(other.ReservedPorts) <= i { - return false - } - if port != other.ReservedPorts[i] { - return false - } + data = append(data, []byte(fmt.Sprintf("%d%s%d%d", i, port.Label, port.Value, port.To))...) } - if len(nr.DynamicPorts) != len(other.DynamicPorts) { - return false - } for i, port := range nr.DynamicPorts { - if len(other.DynamicPorts) <= i { - return false - } - if port != other.DynamicPorts[i] { - return false - } + data = append(data, []byte(fmt.Sprintf("%d%s%d%d", i, port.Label, port.Value, port.To))...) } - return true + return crc32.ChecksumIEEE(data) +} + +func (nr *NetworkResource) Equals(other *NetworkResource) bool { + return nr.Hash() == other.Hash() } func (n *NetworkResource) Canonicalize() { @@ -2579,6 +2549,7 @@ func (n *NodeResources) Merge(o *NodeResources) { n.Memory.Merge(&o.Memory) n.Disk.Merge(&o.Disk) + if len(o.Networks) != 0 { n.Networks = o.Networks } From c62600d5778156fd4d40ad216c5cf5761d25afa3 Mon Sep 17 00:00:00 2001 From: Nick Ethier Date: Thu, 26 Mar 2020 23:36:42 -0400 Subject: [PATCH 3/8] cni: add network feasibility checker /w host and bridge mode support --- client/fingerprint/bridge.go | 12 +++++++ client/fingerprint/bridge_default.go | 5 +++ client/fingerprint/bridge_linux.go | 46 +++++++++++++++++++++++++ client/fingerprint/bridge_linux_test.go | 14 ++++++++ client/fingerprint/fingerprint_linux.go | 1 + client/fingerprint/network.go | 1 + nomad/mock/mock.go | 1 + nomad/structs/structs.go | 4 +-- nomad/structs/structs_test.go | 44 +++++++++++++++++++++++ scheduler/feasible.go | 38 ++++++++++++++++++++ scheduler/feasible_test.go | 42 ++++++++++++++++++++++ scheduler/stack.go | 4 +++ scheduler/stack_oss.go | 6 +++- 13 files changed, 215 insertions(+), 3 deletions(-) create mode 100644 client/fingerprint/bridge.go create mode 100644 client/fingerprint/bridge_default.go create mode 100644 client/fingerprint/bridge_linux.go create mode 100644 client/fingerprint/bridge_linux_test.go diff --git a/client/fingerprint/bridge.go b/client/fingerprint/bridge.go new file mode 100644 index 000000000000..163e8cbea191 --- /dev/null +++ b/client/fingerprint/bridge.go @@ -0,0 +1,12 @@ +package fingerprint + +import log "github.com/hashicorp/go-hclog" + +type BridgeFingerprint struct { + logger log.Logger + StaticFingerprinter +} + +func NewBridgeFingerprint(logger log.Logger) Fingerprint { + return &BridgeFingerprint{logger: logger} +} diff --git a/client/fingerprint/bridge_default.go b/client/fingerprint/bridge_default.go new file mode 100644 index 000000000000..bf5445227301 --- /dev/null +++ b/client/fingerprint/bridge_default.go @@ -0,0 +1,5 @@ +// +build !linux + +package fingerprint + +func (f *BridgeFingerprint) Fingerprint(*FingerprintRequest, *FingerprintResponse) error { return nil } diff --git a/client/fingerprint/bridge_linux.go b/client/fingerprint/bridge_linux.go new file mode 100644 index 000000000000..336fda4ea329 --- /dev/null +++ b/client/fingerprint/bridge_linux.go @@ -0,0 +1,46 @@ +package fingerprint + +import ( + "bufio" + "fmt" + "os" + "strings" + + "github.com/hashicorp/nomad/nomad/structs" +) + +const bridgeKernelModuleName = "bridge" + +func (f *BridgeFingerprint) Fingerprint(req *FingerprintRequest, resp *FingerprintResponse) error { + if err := f.checkKMod(bridgeKernelModuleName); err != nil { + f.logger.Warn("failed to detect bridge kernel module, bridge network mode disabled", "error", err) + return nil + } + + resp.NodeResources = &structs.NodeResources{ + Networks: []*structs.NetworkResource{ + { + Mode: "bridge", + }, + }, + } + resp.Detected = true + return nil +} + +func (f *BridgeFingerprint) checkKMod(mod string) error { + file, err := os.Open("/proc/modules") + if err != nil { + return fmt.Errorf("could not read /proc/modules: %v", err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + if strings.HasPrefix(scanner.Text(), mod+" ") { + return nil + } + } + + return fmt.Errorf("could not detect kernel module %s", mod) +} diff --git a/client/fingerprint/bridge_linux_test.go b/client/fingerprint/bridge_linux_test.go new file mode 100644 index 000000000000..775a5422730c --- /dev/null +++ b/client/fingerprint/bridge_linux_test.go @@ -0,0 +1,14 @@ +package fingerprint + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestBridgeFingerprint_checkKMod(t *testing.T) { + require := require.New(t) + f := &BridgeFingerprint{} + require.NoError(f.checkKMod("hid")) + require.Error(f.checkKMod("nonexistentmodule")) +} \ No newline at end of file diff --git a/client/fingerprint/fingerprint_linux.go b/client/fingerprint/fingerprint_linux.go index f52669a7f18b..10c9768b29f4 100644 --- a/client/fingerprint/fingerprint_linux.go +++ b/client/fingerprint/fingerprint_linux.go @@ -2,4 +2,5 @@ package fingerprint func initPlatformFingerprints(fps map[string]Factory) { fps["cgroup"] = NewCGroupFingerprint + fps["bridge"] = NewBridgeFingerprint } diff --git a/client/fingerprint/network.go b/client/fingerprint/network.go index 9cc4fcc658f7..73b8fa5098c4 100644 --- a/client/fingerprint/network.go +++ b/client/fingerprint/network.go @@ -132,6 +132,7 @@ func (f *NetworkFingerprint) createNetworkResources(throughput int, intf *net.In for _, addr := range addrs { // Create a new network resource newNetwork := &structs.NetworkResource{ + Mode: "host", Device: intf.Name, MBits: throughput, } diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index 23738503cfdf..49c911bfdc03 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -65,6 +65,7 @@ func Node() *structs.Node { }, Networks: []*structs.NetworkResource{ { + Mode: "host", Device: "eth0", CIDR: "192.168.0.100/32", MBits: 1000, diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index cde3c860ee92..d1dc4afba247 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -12,6 +12,7 @@ import ( "encoding/hex" "errors" "fmt" + "hash/crc32" "math" "net" "os" @@ -2549,9 +2550,8 @@ func (n *NodeResources) Merge(o *NodeResources) { n.Memory.Merge(&o.Memory) n.Disk.Merge(&o.Disk) - if len(o.Networks) != 0 { - n.Networks = o.Networks + n.Networks = append(n.Networks, o.Networks...) } if len(o.Devices) != 0 { diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index a160a3ec9168..a6f8b1a2fb22 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -5238,3 +5238,47 @@ func TestNodeReservedNetworkResources_ParseReserved(t *testing.T) { require.Equal(out, tc.Parsed) } } + +func TestNodeResources_Merge(t *testing.T) { + res := &NodeResources{ + Cpu: NodeCpuResources{ + CpuShares: int64(32000), + }, + Memory: NodeMemoryResources{ + MemoryMB: int64(64000), + }, + Networks: Networks{ + { + Device: "foo", + }, + }, + } + + res.Merge(&NodeResources{ + Memory: NodeMemoryResources{ + MemoryMB: int64(100000), + }, + Networks: Networks{ + { + Mode: "foo/bar", + }, + }, + }) + + require.Exactly(t, &NodeResources{ + Cpu: NodeCpuResources{ + CpuShares: int64(32000), + }, + Memory: NodeMemoryResources{ + MemoryMB: int64(100000), + }, + Networks: Networks{ + { + Device: "foo", + }, + { + Mode: "foo/bar", + }, + }, + }, res) +} diff --git a/scheduler/feasible.go b/scheduler/feasible.go index 848764443fc1..edcaae1255dd 100644 --- a/scheduler/feasible.go +++ b/scheduler/feasible.go @@ -314,6 +314,44 @@ func (c *CSIVolumeChecker) hasPlugins(n *structs.Node) (bool, string) { return true, "" } +// NetworkChecker is a FeasibilityChecker which returns whether a node has the +// network resources necessary to schedule the task group +type NetworkChecker struct { + ctx Context + networkMode string +} + +func NewNetworkChecker(ctx Context) *NetworkChecker { + return &NetworkChecker{ctx: ctx, networkMode: "host"} +} + +func (c *NetworkChecker) SetNetworkMode(netMode string) { + c.networkMode = netMode +} + +func (c *NetworkChecker) Feasible(option *structs.Node) bool { + if c.hasNetwork(option) { + return true + } + + c.ctx.Metrics().FilterNode(option, "missing network") + return false +} + +func (c *NetworkChecker) hasNetwork(option *structs.Node) bool { + if option.NodeResources == nil { + return false + } + + for _, nw := range option.NodeResources.Networks { + if nw.Mode == c.networkMode { + return true + } + } + + return false +} + // DriverChecker is a FeasibilityChecker which returns whether a node has the // drivers necessary to scheduler a task group. type DriverChecker struct { diff --git a/scheduler/feasible_test.go b/scheduler/feasible_test.go index 18e50c6ee4e7..725a90bc121f 100644 --- a/scheduler/feasible_test.go +++ b/scheduler/feasible_test.go @@ -397,6 +397,48 @@ func TestCSIVolumeChecker(t *testing.T) { } } +func TestNetworkChecker(t *testing.T) { + _, ctx := testContext(t) + nodes := []*structs.Node{ + mock.Node(), + mock.Node(), + mock.Node(), + } + nodes[0].NodeResources.Networks = append(nodes[0].NodeResources.Networks, &structs.NetworkResource{Mode: "bridge"}) + nodes[1].NodeResources.Networks = append(nodes[1].NodeResources.Networks, &structs.NetworkResource{Mode: "bridge"}) + nodes[2].NodeResources.Networks = append(nodes[2].NodeResources.Networks, &structs.NetworkResource{Mode: "cni/mynet"}) + + checker := NewNetworkChecker(ctx) + cases := []struct { + mode string + results []bool + }{ + { + mode: "host", + results: []bool{true, true, true}, + }, + { + mode: "bridge", + results: []bool{true, true, false}, + }, + { + mode: "cni/mynet", + results: []bool{false, false, true}, + }, + { + mode: "cni/nonexistent", + results: []bool{false, false, false}, + }, + } + + for _, c := range cases { + checker.SetNetworkMode(c.mode) + for i, node := range nodes { + require.Equal(t, c.results[i], checker.Feasible(node), "mode=%q, idx=%d", c.mode, i) + } + } +} + func TestDriverChecker_DriverInfo(t *testing.T) { _, ctx := testContext(t) nodes := []*structs.Node{ diff --git a/scheduler/stack.go b/scheduler/stack.go index b9ceb19bb31e..063596a598d0 100644 --- a/scheduler/stack.go +++ b/scheduler/stack.go @@ -52,6 +52,7 @@ type GenericStack struct { taskGroupDevices *DeviceChecker taskGroupHostVolumes *HostVolumeChecker taskGroupCSIVolumes *CSIVolumeChecker + taskGroupNetwork *NetworkChecker distinctHostsConstraint *DistinctHostsIterator distinctPropertyConstraint *DistinctPropertyIterator @@ -135,6 +136,9 @@ func (s *GenericStack) Select(tg *structs.TaskGroup, options *SelectOptions) *Ra s.taskGroupDevices.SetTaskGroup(tg) s.taskGroupHostVolumes.SetVolumes(tg.Volumes) s.taskGroupCSIVolumes.SetVolumes(tg.Volumes) + if len(tg.Networks) > 0 { + s.taskGroupNetwork.SetNetworkMode(tg.Networks[0].Mode) + } s.distinctHostsConstraint.SetTaskGroup(tg) s.distinctPropertyConstraint.SetTaskGroup(tg) s.wrappedChecks.SetTaskGroup(tg.Name) diff --git a/scheduler/stack_oss.go b/scheduler/stack_oss.go index a705b8c7395b..d9fe07bca0e7 100644 --- a/scheduler/stack_oss.go +++ b/scheduler/stack_oss.go @@ -37,6 +37,9 @@ func NewGenericStack(batch bool, ctx Context) *GenericStack { // Filter on available, healthy CSI plugins s.taskGroupCSIVolumes = NewCSIVolumeChecker(ctx) + // Filter on available client networks + s.taskGroupNetwork = NewNetworkChecker(ctx) + // Create the feasibility wrapper which wraps all feasibility checks in // which feasibility checking can be skipped if the computed node class has // previously been marked as eligible or ineligible. Generally this will be @@ -45,7 +48,8 @@ func NewGenericStack(batch bool, ctx Context) *GenericStack { tgs := []FeasibilityChecker{s.taskGroupDrivers, s.taskGroupConstraint, s.taskGroupHostVolumes, - s.taskGroupDevices} + s.taskGroupDevices, + s.taskGroupNetwork} avail := []FeasibilityChecker{s.taskGroupCSIVolumes} s.wrappedChecks = NewFeasibilityWrapper(ctx, s.quota, jobs, tgs, avail) From 4d69ffd5f5ed7e8d300b9ca85e91f2364aa99cc7 Mon Sep 17 00:00:00 2001 From: Nick Ethier Date: Thu, 26 Mar 2020 23:41:44 -0400 Subject: [PATCH 4/8] cni: client cni network setup and refactored bridge networking to reuse cni --- client/allocrunner/network_manager_linux.go | 11 +- client/allocrunner/networking_bridge_linux.go | 99 ++-------- client/allocrunner/networking_cni.go | 177 ++++++++++++++++++ client/config/config.go | 5 + client/fingerprint/bridge_linux_test.go | 4 +- client/fingerprint/fingerprint.go | 2 +- client/fingerprint_manager.go | 2 +- 7 files changed, 206 insertions(+), 94 deletions(-) create mode 100644 client/allocrunner/networking_cni.go diff --git a/client/allocrunner/network_manager_linux.go b/client/allocrunner/network_manager_linux.go index 87da239c4337..b6702a7fe399 100644 --- a/client/allocrunner/network_manager_linux.go +++ b/client/allocrunner/network_manager_linux.go @@ -117,6 +117,9 @@ func netModeToIsolationMode(netMode string) drivers.NetIsolationMode { case "driver": return drivers.NetIsolationModeTask default: + if strings.HasPrefix(strings.ToLower(netMode), "cni/") { + return drivers.NetIsolationModeGroup + } return drivers.NetIsolationModeHost } } @@ -129,9 +132,13 @@ func newNetworkConfigurator(log hclog.Logger, alloc *structs.Allocation, config return &hostNetworkConfigurator{}, nil } - switch strings.ToLower(tg.Networks[0].Mode) { - case "bridge": + netMode := strings.ToLower(tg.Networks[0].Mode) + + switch { + case netMode == "bridge": return newBridgeNetworkConfigurator(log, config.BridgeNetworkName, config.BridgeNetworkAllocSubnet, config.CNIPath) + case strings.HasPrefix(netMode, "cni/"): + return newCNINetworkConfigurator(log, config.CNIPath, config.CNIInterfacePrefix, config.CNIConfigDir, netMode[4:]) default: return &hostNetworkConfigurator{}, nil } diff --git a/client/allocrunner/networking_bridge_linux.go b/client/allocrunner/networking_bridge_linux.go index af07ad1d990c..2bd429b8a61a 100644 --- a/client/allocrunner/networking_bridge_linux.go +++ b/client/allocrunner/networking_bridge_linux.go @@ -3,12 +3,7 @@ package allocrunner import ( "context" "fmt" - "math/rand" - "os" - "path/filepath" - "time" - cni "github.com/containerd/go-cni" "github.com/coreos/go-iptables/iptables" hclog "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/nomad/structs" @@ -16,14 +11,6 @@ import ( ) const ( - // envCNIPath is the environment variable name to use to derive the CNI path - // when it is not explicitly set by the client - envCNIPath = "CNI_PATH" - - // defaultCNIPath is the CNI path to use when it is not set by the client - // and is not set by environment variable - defaultCNIPath = "/opt/cni/bin" - // defaultNomadBridgeName is the name of the bridge to use when not set by // the client defaultNomadBridgeName = "nomad" @@ -45,11 +32,10 @@ const ( // shared bridge, configures masquerading for egress traffic and port mapping // for ingress type bridgeNetworkConfigurator struct { - cni cni.CNI + cni *cniNetworkConfigurator allocSubnet string bridgeName string - rand *rand.Rand logger hclog.Logger } @@ -57,21 +43,8 @@ func newBridgeNetworkConfigurator(log hclog.Logger, bridgeName, ipRange, cniPath b := &bridgeNetworkConfigurator{ bridgeName: bridgeName, allocSubnet: ipRange, - rand: rand.New(rand.NewSource(time.Now().Unix())), logger: log, } - if cniPath == "" { - if cniPath = os.Getenv(envCNIPath); cniPath == "" { - cniPath = defaultCNIPath - } - } - - c, err := cni.New(cni.WithPluginDir(filepath.SplitList(cniPath)), - cni.WithInterfacePrefix(bridgeNetworkAllocIfPrefix)) - if err != nil { - return nil, err - } - b.cni = c if b.bridgeName == "" { b.bridgeName = defaultNomadBridgeName @@ -81,6 +54,12 @@ func newBridgeNetworkConfigurator(log hclog.Logger, bridgeName, ipRange, cniPath b.allocSubnet = defaultNomadAllocSubnet } + c, err := newCNINetworkConfiguratorWithConf(log, cniPath, bridgeNetworkAllocIfPrefix, buildNomadBridgeNetConfig(b.bridgeName, b.allocSubnet)) + if err != nil { + return nil, err + } + b.cni = c + return b, nil } @@ -148,72 +127,16 @@ func (b *bridgeNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Al return fmt.Errorf("failed to initialize table forwarding rules: %v", err) } - if err := b.ensureCNIInitialized(); err != nil { - return err - } - - // Depending on the version of bridge cni plugin (< 0.8.4) a known race could occur - // where two alloc attempt to create the nomad bridge at the same time, resulting - // in one of them to fail. This retry attempts to overcome those erroneous failures. - const retry = 3 - for attempt := 1; ; attempt++ { - //TODO eventually returning the IP from the result would be nice to have in the alloc - if _, err := b.cni.Setup(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc))); err != nil { - b.logger.Warn("failed to configure bridge network", "err", err, "attempt", attempt) - if attempt == retry { - return fmt.Errorf("failed to configure bridge network: %v", err) - } - // Sleep for 1 second + jitter - time.Sleep(time.Second + (time.Duration(b.rand.Int63n(1000)) * time.Millisecond)) - continue - } - break - } - - return nil - + return b.cni.Setup(ctx, alloc, spec) } // Teardown calls the CNI plugins with the delete action func (b *bridgeNetworkConfigurator) Teardown(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) error { - if err := b.ensureCNIInitialized(); err != nil { - return err - } - - return b.cni.Remove(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc))) -} - -func (b *bridgeNetworkConfigurator) ensureCNIInitialized() error { - if err := b.cni.Status(); cni.IsCNINotInitialized(err) { - return b.cni.Load(cni.WithConfListBytes(b.buildNomadNetConfig())) - } else { - return err - } -} - -// getPortMapping builds a list of portMapping structs that are used as the -// portmapping capability arguments for the portmap CNI plugin -func getPortMapping(alloc *structs.Allocation) []cni.PortMapping { - ports := []cni.PortMapping{} - for _, network := range alloc.AllocatedResources.Shared.Networks { - for _, port := range append(network.DynamicPorts, network.ReservedPorts...) { - if port.To < 1 { - continue - } - for _, proto := range []string{"tcp", "udp"} { - ports = append(ports, cni.PortMapping{ - HostPort: int32(port.Value), - ContainerPort: int32(port.To), - Protocol: proto, - }) - } - } - } - return ports + return b.cni.Teardown(ctx, alloc, spec) } -func (b *bridgeNetworkConfigurator) buildNomadNetConfig() []byte { - return []byte(fmt.Sprintf(nomadCNIConfigTemplate, b.bridgeName, b.allocSubnet, cniAdminChainName)) +func buildNomadBridgeNetConfig(bridgeName, subnet string) []byte { + return []byte(fmt.Sprintf(nomadCNIConfigTemplate, bridgeName, subnet, cniAdminChainName)) } const nomadCNIConfigTemplate = `{ diff --git a/client/allocrunner/networking_cni.go b/client/allocrunner/networking_cni.go new file mode 100644 index 000000000000..05dfe24a4c0c --- /dev/null +++ b/client/allocrunner/networking_cni.go @@ -0,0 +1,177 @@ +package allocrunner + +import ( + "context" + "fmt" + "math/rand" + "os" + "path/filepath" + "sort" + "strings" + "time" + + cni "github.com/containerd/go-cni" + cnilibrary "github.com/containernetworking/cni/libcni" + log "github.com/hashicorp/go-hclog" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/hashicorp/nomad/plugins/drivers" +) + +const ( + + // envCNIPath is the environment variable name to use to derive the CNI path + // when it is not explicitly set by the client + envCNIPath = "CNI_PATH" + + // defaultCNIPath is the CNI path to use when it is not set by the client + // and is not set by environment variable + defaultCNIPath = "/opt/cni/bin" + + // defaultCNIInterfacePrefix is the network interface to use if not set in + // client config + defaultCNIInterfacePrefix = "eth" +) + +type cniNetworkConfigurator struct { + cni cni.CNI + cniConf []byte + + rand *rand.Rand + logger log.Logger +} + +func newCNINetworkConfigurator(logger log.Logger, cniPath, cniInterfacePrefix, cniConfDir, networkName string) (*cniNetworkConfigurator, error) { + cniConf, err := loadCNIConf(cniConfDir, networkName) + if err != nil { + return nil, fmt.Errorf("failed to load cni config: %v", err) + } + + return newCNINetworkConfiguratorWithConf(logger, cniPath, cniInterfacePrefix, cniConf) +} + +func newCNINetworkConfiguratorWithConf(logger log.Logger, cniPath, cniInterfacePrefix string, cniConf []byte) (*cniNetworkConfigurator, error) { + conf := &cniNetworkConfigurator{ + cniConf: cniConf, + rand: rand.New(rand.NewSource(time.Now().Unix())), + logger: logger, + } + if cniPath == "" { + if cniPath = os.Getenv(envCNIPath); cniPath == "" { + cniPath = defaultCNIPath + } + } + + if cniInterfacePrefix == "" { + cniInterfacePrefix = defaultCNIInterfacePrefix + } + + c, err := cni.New(cni.WithPluginDir(filepath.SplitList(cniPath)), + cni.WithInterfacePrefix(cniInterfacePrefix)) + if err != nil { + return nil, err + } + conf.cni = c + + return conf, nil +} + +// Setup calls the CNI plugins with the add action +func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) error { + if err := c.ensureCNIInitialized(); err != nil { + return err + } + + // Depending on the version of bridge cni plugin used, a known race could occure + // where two alloc attempt to create the nomad bridge at the same time, resulting + // in one of them to fail. This rety attempts to overcome any + const retry = 3 + for attempt := 1; ; attempt++ { + //TODO eventually returning the IP from the result would be nice to have in the alloc + if _, err := c.cni.Setup(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc))); err != nil { + c.logger.Warn("failed to configure network", "err", err, "attempt", attempt) + if attempt == retry { + return fmt.Errorf("failed to configure network: %v", err) + } + // Sleep for 1 second + jitter + time.Sleep(time.Second + (time.Duration(c.rand.Int63n(1000)) * time.Millisecond)) + continue + } + break + } + + return nil + +} + +func loadCNIConf(confDir, name string) ([]byte, error) { + files, err := cnilibrary.ConfFiles(confDir, []string{".conf", ".conflist", ".json"}) + switch { + case err != nil: + return nil, fmt.Errorf("failed to read cni config file: %v", err) + case len(files) == 0: + return nil, fmt.Errorf("no cni network config found in %s", confDir) + } + + // files contains the network config files associated with cni network. + // Use lexicographical way as a defined order for network config files. + sort.Strings(files) + for _, confFile := range files { + if strings.HasSuffix(confFile, ".conflist") { + confList, err := cnilibrary.ConfListFromFile(confFile) + if err != nil { + return nil, fmt.Errorf("failed to load CNI config list file %s: %v", confFile, err) + } + if confList.Name == name { + return confList.Bytes, nil + } + } else { + conf, err := cnilibrary.ConfFromFile(confFile) + if err != nil { + return nil, fmt.Errorf("failed to load CNI config file %s: %v", confFile, err) + } + if conf.Network.Name == name { + return conf.Bytes, nil + } + } + } + + return nil, fmt.Errorf("CNI network config not found for name %q", name) +} + +// Teardown calls the CNI plugins with the delete action +func (c *cniNetworkConfigurator) Teardown(ctx context.Context, alloc *structs.Allocation, spec *drivers.NetworkIsolationSpec) error { + if err := c.ensureCNIInitialized(); err != nil { + return err + } + + return c.cni.Remove(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc))) +} + +func (c *cniNetworkConfigurator) ensureCNIInitialized() error { + if err := c.cni.Status(); cni.IsCNINotInitialized(err) { + return c.cni.Load(cni.WithConfListBytes(c.cniConf)) + } else { + return err + } +} + +// getPortMapping builds a list of portMapping structs that are used as the +// portmapping capability arguments for the portmap CNI plugin +func getPortMapping(alloc *structs.Allocation) []cni.PortMapping { + ports := []cni.PortMapping{} + for _, network := range alloc.AllocatedResources.Shared.Networks { + for _, port := range append(network.DynamicPorts, network.ReservedPorts...) { + if port.To < 1 { + continue + } + for _, proto := range []string{"tcp", "udp"} { + ports = append(ports, cni.PortMapping{ + HostPort: int32(port.Value), + ContainerPort: int32(port.To), + Protocol: proto, + }) + } + } + } + return ports +} diff --git a/client/config/config.go b/client/config/config.go index 0dfca7878e59..df6b3a87618c 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -238,6 +238,11 @@ type Config struct { // client will use this path when fingerprinting CNI networks. CNIConfigDir string + // CNIInterfacePrefix is the prefix to use when creating CNI network interfaces. This + // defaults to 'eth', therefore the first interface created by CNI inside the alloc + // network will be 'eth0'. + CNIInterfacePrefix string + // BridgeNetworkName is the name to use for the bridge created in bridge // networking mode. This defaults to 'nomad' if not set BridgeNetworkName string diff --git a/client/fingerprint/bridge_linux_test.go b/client/fingerprint/bridge_linux_test.go index 775a5422730c..4e55c92dcee4 100644 --- a/client/fingerprint/bridge_linux_test.go +++ b/client/fingerprint/bridge_linux_test.go @@ -9,6 +9,6 @@ import ( func TestBridgeFingerprint_checkKMod(t *testing.T) { require := require.New(t) f := &BridgeFingerprint{} - require.NoError(f.checkKMod("hid")) + require.NoError(f.checkKMod("ip_tables")) require.Error(f.checkKMod("nonexistentmodule")) -} \ No newline at end of file +} diff --git a/client/fingerprint/fingerprint.go b/client/fingerprint/fingerprint.go index c41fcbf259f0..d896b48e2240 100644 --- a/client/fingerprint/fingerprint.go +++ b/client/fingerprint/fingerprint.go @@ -117,7 +117,7 @@ type Fingerprint interface { } // ReloadableFingerprint can be implemented if the fingerprinter needs to be run during client reload. -// If implemented, the client will call Reload during client reload then immediatly Fingerprint +// If implemented, the client will call Reload during client reload then immediately Fingerprint type ReloadableFingerprint interface { Fingerprint Reload() diff --git a/client/fingerprint_manager.go b/client/fingerprint_manager.go index eff1a01f9d1c..e8993526ffaf 100644 --- a/client/fingerprint_manager.go +++ b/client/fingerprint_manager.go @@ -106,7 +106,7 @@ func (fp *FingerprintManager) Run() error { return nil } -// Reload will reload any registered ReloadableFingerprinters and immediatly call Fingerprint +// Reload will reload any registered ReloadableFingerprinters and immediately call Fingerprint func (fm *FingerprintManager) Reload() { for name, fp := range fm.reloadableFps { fm.logger.Info("reloading fingerprinter", "fingerprinter", name) From 17b2a4a13c2b33b5c1af1edb88eca42f42135d12 Mon Sep 17 00:00:00 2001 From: Nick Ethier Date: Fri, 27 Mar 2020 00:04:20 -0400 Subject: [PATCH 5/8] cni: set defaults for cni client config --- client/config/config.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/client/config/config.go b/client/config/config.go index df6b3a87618c..a6df2fac2d18 100644 --- a/client/config/config.go +++ b/client/config/config.go @@ -310,7 +310,9 @@ func DefaultConfig() *Config { }, BackwardsCompatibleMetrics: false, RPCHoldTimeout: 5 * time.Second, + CNIPath: "/opt/cni/bin", CNIConfigDir: "/opt/cni/config", + CNIInterfacePrefix: "eth", } } From 02454d688e78adeacfd6c22799dc8f957e286191 Mon Sep 17 00:00:00 2001 From: Nick Ethier Date: Fri, 27 Mar 2020 01:59:43 -0400 Subject: [PATCH 6/8] cni: fix networkinterface config override /w multiple network resources --- client/client.go | 37 ++++++++++++++------------- client/client_test.go | 48 +++++++++++++++-------------------- client/fingerprint/env_aws.go | 1 + 3 files changed, 42 insertions(+), 44 deletions(-) diff --git a/client/client.go b/client/client.go index e2a6145723ec..a45f4f4f197a 100644 --- a/client/client.go +++ b/client/client.go @@ -1432,7 +1432,6 @@ func (c *Client) updateNodeFromFingerprint(response *fingerprint.FingerprintResp // if we still have node changes, merge them if response.Resources != nil { response.Resources.Networks = updateNetworks( - c.config.Node.Resources.Networks, response.Resources.Networks, c.config) if !c.config.Node.Resources.Equals(response.Resources) { @@ -1445,7 +1444,6 @@ func (c *Client) updateNodeFromFingerprint(response *fingerprint.FingerprintResp // if we still have node changes, merge them if response.NodeResources != nil { response.NodeResources.Networks = updateNetworks( - c.config.Node.NodeResources.Networks, response.NodeResources.Networks, c.config) if !c.config.Node.NodeResources.Equals(response.NodeResources) { @@ -1461,33 +1459,38 @@ func (c *Client) updateNodeFromFingerprint(response *fingerprint.FingerprintResp return c.configCopy.Node } -// updateNetworks preserves manually configured network options, but -// applies fingerprint updates -func updateNetworks(ns structs.Networks, up structs.Networks, c *config.Config) structs.Networks { - if c.NetworkInterface == "" { - ns = up - } else { - // If a network device is configured, filter up to contain details for only +func updateNetworks(up structs.Networks, c *config.Config) structs.Networks { + if up == nil { + return nil + } + + if c.NetworkInterface != "" { + // For host networks, if a network device is configured filter up to contain details for only // that device upd := []*structs.NetworkResource{} for _, n := range up { - if c.NetworkInterface == n.Device { + switch n.Mode { + case "host": + if c.NetworkInterface == n.Device { + upd = append(upd, n) + } + default: upd = append(upd, n) + } } - // If updates, use them. Otherwise, ns contains the configured interfaces - if len(upd) > 0 { - ns = upd - } + up = upd } // ns is set, apply the config NetworkSpeed to all if c.NetworkSpeed != 0 { - for _, n := range ns { - n.MBits = c.NetworkSpeed + for _, n := range up { + if n.Mode == "host" { + n.MBits = c.NetworkSpeed + } } } - return ns + return up } // retryIntv calculates a retry interval value given the base diff --git a/client/client_test.go b/client/client_test.go index 95bf51069ade..a9793ebaf04e 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -1184,17 +1184,16 @@ func TestClient_UpdateNodeFromFingerprintKeepsConfig(t *testing.T) { client.updateNodeFromFingerprint(&fingerprint.FingerprintResponse{ NodeResources: &structs.NodeResources{ Cpu: structs.NodeCpuResources{CpuShares: 123}, - Networks: []*structs.NetworkResource{{Device: "any-interface"}}, + Networks: []*structs.NetworkResource{{Mode: "host", Device: "any-interface"}}, }, Resources: &structs.Resources{ - CPU: 80, - Networks: []*structs.NetworkResource{{Device: "any-interface"}}, + CPU: 80, }, }) - assert.Equal(t, int64(123), client.config.Node.NodeResources.Cpu.CpuShares) - assert.Equal(t, "any-interface", client.config.Node.NodeResources.Networks[0].Device) - assert.Equal(t, 80, client.config.Node.Resources.CPU) - assert.Equal(t, "any-interface", client.config.Node.Resources.Networks[0].Device) + idx := len(client.config.Node.NodeResources.Networks) - 1 + require.Equal(t, int64(123), client.config.Node.NodeResources.Cpu.CpuShares) + require.Equal(t, "any-interface", client.config.Node.NodeResources.Networks[idx].Device) + require.Equal(t, 80, client.config.Node.Resources.CPU) // lookup an interface. client.Node starts with a hardcoded value, eth0, // and is only updated async through fingerprinter. @@ -1210,48 +1209,43 @@ func TestClient_UpdateNodeFromFingerprintKeepsConfig(t *testing.T) { client, cleanup = TestClient(t, func(c *config.Config) { c.NetworkInterface = dev c.Node.Name = name + c.Options["fingerprint.blacklist"] = "network" // Node is already a mock.Node, with a device c.Node.NodeResources.Networks[0].Device = dev - c.Node.Resources.Networks = c.Node.NodeResources.Networks }) defer cleanup() client.updateNodeFromFingerprint(&fingerprint.FingerprintResponse{ NodeResources: &structs.NodeResources{ Cpu: structs.NodeCpuResources{CpuShares: 123}, Networks: []*structs.NetworkResource{ - {Device: "any-interface", MBits: 20}, - {Device: dev, MBits: 20}, + {Mode: "host", Device: "any-interface", MBits: 20}, }, }, - Resources: &structs.Resources{ - CPU: 80, - Networks: []*structs.NetworkResource{{Device: "any-interface"}}, - }, }) - assert.Equal(t, int64(123), client.config.Node.NodeResources.Cpu.CpuShares) + require.Equal(t, int64(123), client.config.Node.NodeResources.Cpu.CpuShares) // only the configured device is kept - assert.Equal(t, 1, len(client.config.Node.NodeResources.Networks)) - assert.Equal(t, dev, client.config.Node.NodeResources.Networks[0].Device) - // network speed updates to the configured network are kept - assert.Equal(t, 20, client.config.Node.NodeResources.Networks[0].MBits) - assert.Equal(t, 80, client.config.Node.Resources.CPU) - assert.Equal(t, dev, client.config.Node.Resources.Networks[0].Device) + require.Equal(t, 2, len(client.config.Node.NodeResources.Networks)) + require.Equal(t, dev, client.config.Node.NodeResources.Networks[0].Device) + require.Equal(t, "bridge", client.config.Node.NodeResources.Networks[1].Mode) // Network speed is applied to all NetworkResources client.config.NetworkInterface = "" client.config.NetworkSpeed = 100 client.updateNodeFromFingerprint(&fingerprint.FingerprintResponse{ NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{CpuShares: 123}, - Networks: []*structs.NetworkResource{{Device: "any-interface", MBits: 20}}, + Cpu: structs.NodeCpuResources{CpuShares: 123}, + Networks: []*structs.NetworkResource{ + {Mode: "host", Device: "any-interface", MBits: 20}, + }, }, Resources: &structs.Resources{ - CPU: 80, - Networks: []*structs.NetworkResource{{Device: "any-interface"}}, + CPU: 80, }, }) - assert.Equal(t, "any-interface", client.config.Node.NodeResources.Networks[0].Device) - assert.Equal(t, 100, client.config.Node.NodeResources.Networks[0].MBits) + assert.Equal(t, 3, len(client.config.Node.NodeResources.Networks)) + assert.Equal(t, "any-interface", client.config.Node.NodeResources.Networks[2].Device) + assert.Equal(t, 100, client.config.Node.NodeResources.Networks[2].MBits) + assert.Equal(t, 0, client.config.Node.NodeResources.Networks[1].MBits) } // Support multiple IP addresses (ipv4 vs. 6, e.g.) on the configured network interface diff --git a/client/fingerprint/env_aws.go b/client/fingerprint/env_aws.go index 0b2466fd3bb3..87becff9e9aa 100644 --- a/client/fingerprint/env_aws.go +++ b/client/fingerprint/env_aws.go @@ -485,6 +485,7 @@ func (f *EnvAWSFingerprint) Fingerprint(request *FingerprintRequest, response *F nodeResources = new(structs.NodeResources) nodeResources.Networks = []*structs.NetworkResource{ { + Mode: "host", Device: "eth0", IP: val, CIDR: val + "/32", From 39e247aa9e213b577da4ea6ba10d6e617c08f4e9 Mon Sep 17 00:00:00 2001 From: Nick Ethier Date: Tue, 31 Mar 2020 14:26:30 -0400 Subject: [PATCH 7/8] cni: return first error if setup retry fails Signed-off-by: Nick Ethier --- client/allocrunner/networking_cni.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/client/allocrunner/networking_cni.go b/client/allocrunner/networking_cni.go index 05dfe24a4c0c..27f682c804a8 100644 --- a/client/allocrunner/networking_cni.go +++ b/client/allocrunner/networking_cni.go @@ -85,13 +85,18 @@ func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Alloc // where two alloc attempt to create the nomad bridge at the same time, resulting // in one of them to fail. This rety attempts to overcome any const retry = 3 + var firstError error for attempt := 1; ; attempt++ { //TODO eventually returning the IP from the result would be nice to have in the alloc if _, err := c.cni.Setup(ctx, alloc.ID, spec.Path, cni.WithCapabilityPortMap(getPortMapping(alloc))); err != nil { c.logger.Warn("failed to configure network", "err", err, "attempt", attempt) - if attempt == retry { - return fmt.Errorf("failed to configure network: %v", err) + switch attempt { + case 1: + firstError = err + case retry: + return fmt.Errorf("failed to configure network: %v", firstError) } + // Sleep for 1 second + jitter time.Sleep(time.Second + (time.Duration(c.rand.Int63n(1000)) * time.Millisecond)) continue From 8dc915900e6e9c61fadc54df99dedec9ae72556f Mon Sep 17 00:00:00 2001 From: Nick Ethier Date: Tue, 31 Mar 2020 15:06:22 -0400 Subject: [PATCH 8/8] cni: fix assertion ordering in tests with multiple node networks ignore comparing networking for now in test Apply suggestions from code review Co-authored-by: Seth Hoenig --- client/allocrunner/networking_cni.go | 8 ++++---- client/client.go | 4 +++- client/client_test.go | 10 +++++++--- client/fingerprint/bridge_linux.go | 7 +++++-- client/fingerprint/cni.go | 8 +++++--- client/fingerprint/cni_test.go | 16 +++++++++++----- .../fingerprint/test_fixtures/cni/net2.conflist | 3 +-- nomad/structs/structs.go | 4 ++-- 8 files changed, 38 insertions(+), 22 deletions(-) diff --git a/client/allocrunner/networking_cni.go b/client/allocrunner/networking_cni.go index 27f682c804a8..15cd2745fd49 100644 --- a/client/allocrunner/networking_cni.go +++ b/client/allocrunner/networking_cni.go @@ -43,7 +43,7 @@ type cniNetworkConfigurator struct { func newCNINetworkConfigurator(logger log.Logger, cniPath, cniInterfacePrefix, cniConfDir, networkName string) (*cniNetworkConfigurator, error) { cniConf, err := loadCNIConf(cniConfDir, networkName) if err != nil { - return nil, fmt.Errorf("failed to load cni config: %v", err) + return nil, fmt.Errorf("failed to load CNI config: %v", err) } return newCNINetworkConfiguratorWithConf(logger, cniPath, cniInterfacePrefix, cniConf) @@ -83,7 +83,7 @@ func (c *cniNetworkConfigurator) Setup(ctx context.Context, alloc *structs.Alloc // Depending on the version of bridge cni plugin used, a known race could occure // where two alloc attempt to create the nomad bridge at the same time, resulting - // in one of them to fail. This rety attempts to overcome any + // in one of them to fail. This rety attempts to overcome those erroneous failures. const retry = 3 var firstError error for attempt := 1; ; attempt++ { @@ -112,9 +112,9 @@ func loadCNIConf(confDir, name string) ([]byte, error) { files, err := cnilibrary.ConfFiles(confDir, []string{".conf", ".conflist", ".json"}) switch { case err != nil: - return nil, fmt.Errorf("failed to read cni config file: %v", err) + return nil, fmt.Errorf("failed to detect CNI config file: %v", err) case len(files) == 0: - return nil, fmt.Errorf("no cni network config found in %s", confDir) + return nil, fmt.Errorf("no CNI network config found in %s", confDir) } // files contains the network config files associated with cni network. diff --git a/client/client.go b/client/client.go index a45f4f4f197a..b96169c83432 100644 --- a/client/client.go +++ b/client/client.go @@ -1459,6 +1459,8 @@ func (c *Client) updateNodeFromFingerprint(response *fingerprint.FingerprintResp return c.configCopy.Node } +// updateNetworks filters and overrides network speed of host networks based +// on configured settings func updateNetworks(up structs.Networks, c *config.Config) structs.Networks { if up == nil { return nil @@ -1482,7 +1484,7 @@ func updateNetworks(up structs.Networks, c *config.Config) structs.Networks { up = upd } - // ns is set, apply the config NetworkSpeed to all + // if set, apply the config NetworkSpeed to networks in host mode if c.NetworkSpeed != 0 { for _, n := range up { if n.Mode == "host" { diff --git a/client/client_test.go b/client/client_test.go index a9793ebaf04e..1afb34422fc6 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -1263,7 +1263,7 @@ func Test_UpdateNodeFromFingerprintMultiIP(t *testing.T) { // Client without network configured updates to match fingerprint client, cleanup := TestClient(t, func(c *config.Config) { c.NetworkInterface = dev - c.Node.NodeResources.Networks[0].Device = dev + c.Options["fingerprint.blacklist"] = "network,cni,bridge" c.Node.Resources.Networks = c.Node.NodeResources.Networks }) defer cleanup() @@ -1278,12 +1278,13 @@ func Test_UpdateNodeFromFingerprintMultiIP(t *testing.T) { }, }) - two := structs.Networks{ + nets := structs.Networks{ + mock.Node().NodeResources.Networks[0], {Device: dev, IP: "127.0.0.1"}, {Device: dev, IP: "::1"}, } - require.Equal(t, two, client.config.Node.NodeResources.Networks) + require.Equal(t, nets, client.config.Node.NodeResources.Networks) } func TestClient_computeAllocatedDeviceStats(t *testing.T) { @@ -1474,6 +1475,9 @@ func TestClient_getAllocatedResources(t *testing.T) { result := client.getAllocatedResources(client.config.Node) + // Ignore comparing networks for now + result.Flattened.Networks = nil + expected := structs.ComparableResources{ Flattened: structs.AllocatedTaskResources{ Cpu: structs.AllocatedCpuResources{ diff --git a/client/fingerprint/bridge_linux.go b/client/fingerprint/bridge_linux.go index 336fda4ea329..231d669ecfe9 100644 --- a/client/fingerprint/bridge_linux.go +++ b/client/fingerprint/bridge_linux.go @@ -4,7 +4,7 @@ import ( "bufio" "fmt" "os" - "strings" + "regexp" "github.com/hashicorp/nomad/nomad/structs" ) @@ -36,9 +36,12 @@ func (f *BridgeFingerprint) checkKMod(mod string) error { defer file.Close() scanner := bufio.NewScanner(file) + pattern := fmt.Sprintf("%s\\s+.*$", mod) for scanner.Scan() { - if strings.HasPrefix(scanner.Text(), mod+" ") { + if matched, err := regexp.MatchString(pattern, scanner.Text()); matched { return nil + } else if err != nil { + return fmt.Errorf("could not parse /proc/modules: %v", err) } } diff --git a/client/fingerprint/cni.go b/client/fingerprint/cni.go index 682c029d4bda..9080733db592 100644 --- a/client/fingerprint/cni.go +++ b/client/fingerprint/cni.go @@ -30,7 +30,7 @@ func (f *CNIFingerprint) Fingerprint(req *FingerprintRequest, resp *FingerprintR files, err := libcni.ConfFiles(confDir, []string{".conf", ".conflist", ".json"}) if err != nil { - return fmt.Errorf("failed to read cni conf files: %v", err) + return fmt.Errorf("failed to detect CNI conf files: %v", err) } for _, confFile := range files { @@ -40,7 +40,7 @@ func (f *CNIFingerprint) Fingerprint(req *FingerprintRequest, resp *FingerprintR return fmt.Errorf("failed to load CNI config list file %s: %v", confFile, err) } if _, ok := networks[confList.Name]; ok { - f.logger.Warn("multiple CNI config names found, ignoring file", "name", confList.Name, "file", confFile) + f.logger.Warn("duplicate CNI config names found, ignoring file", "name", confList.Name, "file", confFile) continue } networks[confList.Name] = struct{}{} @@ -50,7 +50,7 @@ func (f *CNIFingerprint) Fingerprint(req *FingerprintRequest, resp *FingerprintR return fmt.Errorf("failed to load CNI config file %s: %v", confFile, err) } if _, ok := networks[conf.Network.Name]; ok { - f.logger.Warn("multiple CNI config names found, ignoring file", "name", conf.Network.Name, "file", confFile) + f.logger.Warn("duplicate CNI config names found, ignoring file", "name", conf.Network.Name, "file", confFile) continue } networks[conf.Network.Name] = struct{}{} @@ -73,3 +73,5 @@ func (f *CNIFingerprint) Fingerprint(req *FingerprintRequest, resp *FingerprintR resp.Detected = true return nil } + +func (f *CNIFingerprint) Reload() {} diff --git a/client/fingerprint/cni_test.go b/client/fingerprint/cni_test.go index d89a930b27a2..3fd125b7d4f6 100644 --- a/client/fingerprint/cni_test.go +++ b/client/fingerprint/cni_test.go @@ -9,6 +9,9 @@ import ( "github.com/stretchr/testify/require" ) +// Test that CNI fingerprinter is reloadable +var _ ReloadableFingerprint = &CNIFingerprint{} + func TestCNIFingerprint(t *testing.T) { cases := []struct { name string @@ -63,16 +66,19 @@ func TestCNIFingerprint(t *testing.T) { for _, c := range cases { t.Run(c.name, func(t *testing.T) { - require := require.New(t) + r := require.New(t) fp := NewCNIFingerprint(testlog.HCLogger(t)) resp := &FingerprintResponse{} err := fp.Fingerprint(c.req, resp) if c.err { - require.Error(err) - require.Contains(err.Error(), c.errMatch) + r.Error(err) + r.Contains(err.Error(), c.errMatch) } else { - require.NoError(err) - require.Exactly(c.exp, resp) + r.NoError(err) + r.Equal(c.exp.Detected, resp.Detected) + if resp.NodeResources != nil || c.exp.NodeResources != nil { + r.ElementsMatch(c.exp.NodeResources.Networks, resp.NodeResources.Networks) + } } }) } diff --git a/client/fingerprint/test_fixtures/cni/net2.conflist b/client/fingerprint/test_fixtures/cni/net2.conflist index 8d14cfcfb968..cec6c67efa95 100644 --- a/client/fingerprint/test_fixtures/cni/net2.conflist +++ b/client/fingerprint/test_fixtures/cni/net2.conflist @@ -19,8 +19,7 @@ "type": "portmap", "capabilities": { "portMappings": true - }, - "externalSetMarkChain": "KUBE-MARK-MASQ" + } } ] } diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index d1dc4afba247..d151a26ba473 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -2238,11 +2238,11 @@ func (nr *NetworkResource) Hash() uint32 { data = append(data, []byte(fmt.Sprintf("%s%s%s%s%d", nr.Mode, nr.Device, nr.CIDR, nr.IP, nr.MBits))...) for i, port := range nr.ReservedPorts { - data = append(data, []byte(fmt.Sprintf("%d%s%d%d", i, port.Label, port.Value, port.To))...) + data = append(data, []byte(fmt.Sprintf("r%d%s%d%d", i, port.Label, port.Value, port.To))...) } for i, port := range nr.DynamicPorts { - data = append(data, []byte(fmt.Sprintf("%d%s%d%d", i, port.Label, port.Value, port.To))...) + data = append(data, []byte(fmt.Sprintf("d%d%s%d%d", i, port.Label, port.Value, port.To))...) } return crc32.ChecksumIEEE(data)