diff --git a/integration/integration_linux_test.go b/integration/integration_linux_test.go index 32d4ccce7..97e73a1ee 100644 --- a/integration/integration_linux_test.go +++ b/integration/integration_linux_test.go @@ -17,6 +17,7 @@ import ( "bytes" "fmt" "io" + "log" "math/rand" "net" "os" @@ -60,6 +61,13 @@ var _ = Describe("Basic PTP using cnitool", func() { netConfPath, err := filepath.Abs("./testdata") Expect(err).NotTo(HaveOccurred()) + // Flush ipam stores to avoid conflicts + err = os.RemoveAll("/tmp/chained-ptp-bandwidth-test") + Expect(err).NotTo(HaveOccurred()) + + err = os.RemoveAll("/tmp/basic-ptp-test") + Expect(err).NotTo(HaveOccurred()) + env = TestEnv([]string{ "CNI_PATH=" + cniPath, "NETCONFPATH=" + netConfPath, @@ -82,6 +90,7 @@ var _ = Describe("Basic PTP using cnitool", func() { env.runInNS(hostNS, cnitoolBin, "add", netName, contNS.LongName()) addrOutput := env.runInNS(contNS, "ip", "addr") + Expect(addrOutput).To(ContainSubstring(expectedIPPrefix)) env.runInNS(hostNS, cnitoolBin, "del", netName, contNS.LongName()) @@ -145,9 +154,13 @@ var _ = Describe("Basic PTP using cnitool", func() { chainedBridgeBandwidthEnv.runInNS(hostNS, cnitoolBin, "del", "network-chain-test", contNS1.LongName()) basicBridgeEnv.runInNS(hostNS, cnitoolBin, "del", "network-chain-test", contNS2.LongName()) + + contNS1.Del() + contNS2.Del() + hostNS.Del() }) - Measure("limits traffic only on the restricted bandwidth veth device", func(b Benchmarker) { + It("limits traffic only on the restricted bandwidth veth device", func() { ipRegexp := regexp.MustCompile(`10\.1[12]\.2\.\d{1,3}`) By(fmt.Sprintf("adding %s to %s\n\n", "chained-bridge-bandwidth", contNS1.ShortName())) @@ -168,21 +181,23 @@ var _ = Describe("Basic PTP using cnitool", func() { By(fmt.Sprintf("starting echo server in %s\n\n", contNS2.ShortName())) basicBridgePort, basicBridgeSession = startEchoServerInNamespace(contNS2) - packetInBytes := 20000 // The shaper needs to 'warm'. Send enough to cause it to throttle, - // balanced by run time. + packetInBytes := 3000 By(fmt.Sprintf("sending tcp traffic to the chained, bridged, traffic shaped container on ip address '%s:%d'\n\n", chainedBridgeIP, chainedBridgeBandwidthPort)) - runtimeWithLimit := b.Time("with chained bridge and bandwidth plugins", func() { - makeTCPClientInNS(hostNS.ShortName(), chainedBridgeIP, chainedBridgeBandwidthPort, packetInBytes) - }) + start := time.Now() + makeTCPClientInNS(hostNS.ShortName(), chainedBridgeIP, chainedBridgeBandwidthPort, packetInBytes) + runtimeWithLimit := time.Since(start) + + log.Printf("Runtime with qos limit %.2f seconds", runtimeWithLimit.Seconds()) By(fmt.Sprintf("sending tcp traffic to the basic bridged container on ip address '%s:%d'\n\n", basicBridgeIP, basicBridgePort)) - runtimeWithoutLimit := b.Time("with basic bridged plugin", func() { - makeTCPClientInNS(hostNS.ShortName(), basicBridgeIP, basicBridgePort, packetInBytes) - }) + start = time.Now() + makeTCPClientInNS(hostNS.ShortName(), basicBridgeIP, basicBridgePort, packetInBytes) + runtimeWithoutLimit := time.Since(start) + log.Printf("Runtime without qos limit %.2f seconds", runtimeWithLimit.Seconds()) Expect(runtimeWithLimit).To(BeNumerically(">", runtimeWithoutLimit+1000*time.Millisecond)) - }, 1) + }) }) }) diff --git a/integration/testdata/basic-ptp.json b/integration/testdata/basic-ptp.json index 11dc3aa61..dc04c5301 100644 --- a/integration/testdata/basic-ptp.json +++ b/integration/testdata/basic-ptp.json @@ -6,6 +6,7 @@ "mtu": 512, "ipam": { "type": "host-local", - "subnet": "10.1.2.0/24" + "subnet": "10.1.2.0/24", + "dataDir": "/tmp/basic-ptp-test" } } diff --git a/integration/testdata/chained-ptp-bandwidth.conflist b/integration/testdata/chained-ptp-bandwidth.conflist index 2510ba882..b7b3d60a7 100644 --- a/integration/testdata/chained-ptp-bandwidth.conflist +++ b/integration/testdata/chained-ptp-bandwidth.conflist @@ -8,7 +8,8 @@ "mtu": 512, "ipam": { "type": "host-local", - "subnet": "10.9.2.0/24" + "subnet": "10.9.2.0/24", + "dataDir": "/tmp/chained-ptp-bandwidth-test" } }, { diff --git a/plugins/meta/bandwidth/bandwidth_linux_test.go b/plugins/meta/bandwidth/bandwidth_linux_test.go index bce274f2f..fae0b2b44 100644 --- a/plugins/meta/bandwidth/bandwidth_linux_test.go +++ b/plugins/meta/bandwidth/bandwidth_linux_test.go @@ -18,8 +18,11 @@ import ( "context" "encoding/json" "fmt" + "log" + "math" "net" "os" + "syscall" "time" . "github.com/onsi/ginkgo/v2" @@ -126,7 +129,7 @@ var _ = Describe("bandwidth test", func() { ver := ver Describe("cmdADD", func() { - It(fmt.Sprintf("[%s] works with a Veth pair", ver), func() { + It(fmt.Sprintf("[%s] works with a Veth pair without any unbounded traffic", ver), func() { conf := fmt.Sprintf(`{ "cniVersion": "%s", "name": "cni-plugin-bandwidth-test", @@ -134,7 +137,7 @@ var _ = Describe("bandwidth test", func() { "ingressRate": 8, "ingressBurst": 8, "egressRate": 16, - "egressBurst": 8, + "egressBurst": 12, "prevResult": { "interfaces": [ { @@ -165,6 +168,7 @@ var _ = Describe("bandwidth test", func() { StdinData: []byte(conf), } + // Container egress (host ingress) Expect(hostNs.Do(func(netNS ns.NetNS) error { defer GinkgoRecover() r, out, err := testutils.CmdAdd(containerNs.Path(), args.ContainerID, "", []byte(conf), func() error { return cmdAdd(args) }) @@ -185,10 +189,34 @@ var _ = Describe("bandwidth test", func() { Expect(qdiscs).To(HaveLen(1)) Expect(qdiscs[0].Attrs().LinkIndex).To(Equal(ifbLink.Attrs().Index)) + Expect(qdiscs[0]).To(BeAssignableToTypeOf(&netlink.Htb{})) + Expect(qdiscs[0].(*netlink.Htb).Defcls).To(Equal(uint32(DefaultClassMinorID))) - Expect(qdiscs[0]).To(BeAssignableToTypeOf(&netlink.Tbf{})) - Expect(qdiscs[0].(*netlink.Tbf).Rate).To(Equal(uint64(2))) - Expect(qdiscs[0].(*netlink.Tbf).Limit).To(Equal(uint32(1))) + classes, err := netlink.ClassList(ifbLink, qdiscs[0].Attrs().Handle) + + Expect(err).NotTo(HaveOccurred()) + Expect(classes).To(HaveLen(2)) + + // Uncapped class + Expect(classes[0]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[0].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, 1))) + Expect(classes[0].(*netlink.HtbClass).Rate).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Buffer).To(Equal(uint32(0))) + Expect(classes[0].(*netlink.HtbClass).Ceil).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + // Class with traffic shapping settings + Expect(classes[1]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[1].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, uint16(qdiscs[0].(*netlink.Htb).Defcls)))) + Expect(classes[1].(*netlink.HtbClass).Rate).To(Equal(uint64(2))) + // Expect(classes[1].(*netlink.HtbClass).Buffer).To(Equal(uint32(7812500))) + Expect(classes[1].(*netlink.HtbClass).Ceil).To(Equal(uint64(4))) + // Expect(classes[1].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + // Since we do not exclude anything from egress traffic shapping, we should not find any filter + filters, err := netlink.FilterList(ifbLink, qdiscs[0].Attrs().Handle) + Expect(err).NotTo(HaveOccurred()) + Expect(filters).To(BeEmpty()) hostVethLink, err := netlink.LinkByName(hostIfname) Expect(err).NotTo(HaveOccurred()) @@ -202,56 +230,85 @@ var _ = Describe("bandwidth test", func() { return nil })).To(Succeed()) + // Container ingress (host egress) Expect(hostNs.Do(func(n ns.NetNS) error { defer GinkgoRecover() - ifbLink, err := netlink.LinkByName(hostIfname) + vethLink, err := netlink.LinkByName(hostIfname) Expect(err).NotTo(HaveOccurred()) - qdiscs, err := netlink.QdiscList(ifbLink) + qdiscs, err := netlink.QdiscList(vethLink) Expect(err).NotTo(HaveOccurred()) Expect(qdiscs).To(HaveLen(2)) - Expect(qdiscs[0].Attrs().LinkIndex).To(Equal(ifbLink.Attrs().Index)) + Expect(qdiscs[0].Attrs().LinkIndex).To(Equal(vethLink.Attrs().Index)) + Expect(qdiscs[0]).To(BeAssignableToTypeOf(&netlink.Htb{})) + Expect(qdiscs[0].(*netlink.Htb).Defcls).To(Equal(uint32(DefaultClassMinorID))) - Expect(qdiscs[0]).To(BeAssignableToTypeOf(&netlink.Tbf{})) - Expect(qdiscs[0].(*netlink.Tbf).Rate).To(Equal(uint64(1))) - Expect(qdiscs[0].(*netlink.Tbf).Limit).To(Equal(uint32(1))) + classes, err := netlink.ClassList(vethLink, qdiscs[0].Attrs().Handle) + + Expect(err).NotTo(HaveOccurred()) + Expect(classes).To(HaveLen(2)) + + // Uncapped class + Expect(classes[0]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[0].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, 1))) + Expect(classes[0].(*netlink.HtbClass).Rate).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Buffer).To(Equal(uint32(0))) + Expect(classes[0].(*netlink.HtbClass).Ceil).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + // Class with traffic shapping settings + Expect(classes[1]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[1].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, uint16(qdiscs[0].(*netlink.Htb).Defcls)))) + Expect(classes[1].(*netlink.HtbClass).Rate).To(Equal(uint64(1))) + // Expect(classes[1].(*netlink.HtbClass).Buffer).To(Equal(uint32(15625000))) + Expect(classes[1].(*netlink.HtbClass).Ceil).To(Equal(uint64(2))) + // Expect(classes[1].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + // Since we do not exclude anything from ingress traffic shapping, we should not find any filter + filters, err := netlink.FilterList(vethLink, qdiscs[0].Attrs().Handle) + Expect(err).NotTo(HaveOccurred()) + Expect(filters).To(BeEmpty()) return nil })).To(Succeed()) }) - It(fmt.Sprintf("[%s] does not apply ingress when disabled", ver), func() { + It(fmt.Sprintf("[%s] works with a Veth pair with some ipv4 and ipv6 unbounded traffic", ver), func() { conf := fmt.Sprintf(`{ - "cniVersion": "%s", - "name": "cni-plugin-bandwidth-test", - "type": "bandwidth", - "ingressRate": 0, - "ingressBurst": 0, - "egressRate": 8000, - "egressBurst": 80, - "prevResult": { - "interfaces": [ - { - "name": "%s", - "sandbox": "" - }, - { - "name": "%s", - "sandbox": "%s" - } - ], - "ips": [ - { - "version": "4", - "address": "%s/24", - "gateway": "10.0.0.1", - "interface": 1 - } - ], - "routes": [] - } - }`, ver, hostIfname, containerIfname, containerNs.Path(), containerIP.String()) + "cniVersion": "%s", + "name": "cni-plugin-bandwidth-test", + "type": "bandwidth", + "ingressRate": 8, + "ingressBurst": 8, + "egressRate": 16, + "egressBurst": 12, + "nonShapedSubnets": [ + "10.0.0.0/8", + "fd00:db8:abcd:1234:e000::/68" + ], + "prevResult": { + "interfaces": [ + { + "name": "%s", + "sandbox": "" + }, + { + "name": "%s", + "sandbox": "%s" + } + ], + "ips": [ + { + "version": "4", + "address": "%s/24", + "gateway": "10.0.0.1", + "interface": 1 + } + ], + "routes": [] + } + }`, ver, hostIfname, containerIfname, containerNs.Path(), containerIP.String()) args := &skel.CmdArgs{ ContainerID: "dummy", @@ -260,267 +317,996 @@ var _ = Describe("bandwidth test", func() { StdinData: []byte(conf), } + // Container egress (host ingress) Expect(hostNs.Do(func(netNS ns.NetNS) error { defer GinkgoRecover() - - _, out, err := testutils.CmdAdd(containerNs.Path(), args.ContainerID, ifbDeviceName, []byte(conf), func() error { return cmdAdd(args) }) + r, out, err := testutils.CmdAdd(containerNs.Path(), args.ContainerID, "", []byte(conf), func() error { return cmdAdd(args) }) Expect(err).NotTo(HaveOccurred(), string(out)) - - _, err = netlink.LinkByName(ifbDeviceName) + result, err := types100.GetResult(r) Expect(err).NotTo(HaveOccurred()) - return nil - })).To(Succeed()) - Expect(hostNs.Do(func(n ns.NetNS) error { - defer GinkgoRecover() + Expect(result.Interfaces).To(HaveLen(3)) + Expect(result.Interfaces[2].Name).To(Equal(ifbDeviceName)) + Expect(result.Interfaces[2].Sandbox).To(Equal("")) - containerIfLink, err := netlink.LinkByName(hostIfname) + ifbLink, err := netlink.LinkByName(ifbDeviceName) Expect(err).NotTo(HaveOccurred()) + Expect(ifbLink.Attrs().MTU).To(Equal(hostIfaceMTU)) - qdiscs, err := netlink.QdiscList(containerIfLink) + qdiscs, err := netlink.QdiscList(ifbLink) Expect(err).NotTo(HaveOccurred()) - Expect(qdiscs).To(HaveLen(2)) - Expect(qdiscs[0]).NotTo(BeAssignableToTypeOf(&netlink.Tbf{})) - Expect(qdiscs[1]).NotTo(BeAssignableToTypeOf(&netlink.Tbf{})) + Expect(qdiscs).To(HaveLen(1)) + Expect(qdiscs[0].Attrs().LinkIndex).To(Equal(ifbLink.Attrs().Index)) + Expect(qdiscs[0]).To(BeAssignableToTypeOf(&netlink.Htb{})) + Expect(qdiscs[0].(*netlink.Htb).Defcls).To(Equal(uint32(DefaultClassMinorID))) - return nil - })).To(Succeed()) - }) + classes, err := netlink.ClassList(ifbLink, qdiscs[0].Attrs().Handle) - It(fmt.Sprintf("[%s] does not apply egress when disabled", ver), func() { - conf := fmt.Sprintf(`{ - "cniVersion": "%s", - "name": "cni-plugin-bandwidth-test", - "type": "bandwidth", - "egressRate": 0, - "egressBurst": 0, - "ingressRate": 8000, - "ingressBurst": 80, - "prevResult": { - "interfaces": [ - { - "name": "%s", - "sandbox": "" - }, - { - "name": "%s", - "sandbox": "%s" - } - ], - "ips": [ - { - "version": "4", - "address": "%s/24", - "gateway": "10.0.0.1", - "interface": 1 - } - ], - "routes": [] - } - }`, ver, hostIfname, containerIfname, containerNs.Path(), containerIP.String()) + Expect(err).NotTo(HaveOccurred()) + Expect(classes).To(HaveLen(2)) + + // Uncapped class + Expect(classes[0]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[0].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, 1))) + Expect(classes[0].(*netlink.HtbClass).Rate).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Buffer).To(Equal(uint32(0))) + Expect(classes[0].(*netlink.HtbClass).Ceil).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + // Class with traffic shapping settings + Expect(classes[1]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[1].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, uint16(qdiscs[0].(*netlink.Htb).Defcls)))) + Expect(classes[1].(*netlink.HtbClass).Rate).To(Equal(uint64(2))) + // Expect(classes[1].(*netlink.HtbClass).Buffer).To(Equal(uint32(7812500))) + Expect(classes[1].(*netlink.HtbClass).Ceil).To(Equal(uint64(4))) + // Expect(classes[1].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + filters, err := netlink.FilterList(ifbLink, qdiscs[0].Attrs().Handle) + Expect(err).NotTo(HaveOccurred()) + Expect(filters).To(HaveLen(2)) + + // traffic to fd00:db8:abcd:1234:e000::/68 redirected to uncapped class + Expect(filters[0]).To(BeAssignableToTypeOf(&netlink.U32{})) + Expect(filters[0].(*netlink.U32).Actions).To(BeEmpty()) + Expect(filters[0].Attrs().Protocol).To(Equal(uint16(syscall.ETH_P_IPV6))) + Expect(filters[0].Attrs().LinkIndex).To(Equal(ifbLink.Attrs().Index)) + Expect(filters[0].Attrs().Priority).To(Equal(uint16(15))) + Expect(filters[0].Attrs().Parent).To(Equal(qdiscs[0].Attrs().Handle)) + Expect(filters[0].(*netlink.U32).ClassId).To(Equal(netlink.MakeHandle(1, 1))) + + filterSel := filters[0].(*netlink.U32).Sel + Expect(filterSel).To(BeAssignableToTypeOf(&netlink.TcU32Sel{})) + Expect(filterSel.Flags).To(Equal(uint8(netlink.TC_U32_TERMINAL))) + Expect(filterSel.Keys).To(HaveLen(3)) + Expect(filterSel.Nkeys).To(Equal(uint8(3))) + + // The filter should match to fd00:db8:abcd:1234:e000::/68 dst address in other words it should be: + // match 0xfd000db8/0xffffffff at 24 + // match 0xabcd1234/0xffffffff at 28 + // match 0xe0000000/0xf0000000 at 32 + // (and last match discarded because it would be equivalent to a matchall/true condition at 36) + Expect(filterSel.Keys[0].Off).To(Equal(int32(24))) + Expect(filterSel.Keys[0].Val).To(Equal(uint32(4244639160))) + Expect(filterSel.Keys[0].Mask).To(Equal(uint32(4294967295))) + + Expect(filterSel.Keys[1].Off).To(Equal(int32(28))) + Expect(filterSel.Keys[1].Val).To(Equal(uint32(2882343476))) + Expect(filterSel.Keys[1].Mask).To(Equal(uint32(4294967295))) + + Expect(filterSel.Keys[2].Off).To(Equal(int32(32))) + Expect(filterSel.Keys[2].Val).To(Equal(uint32(3758096384))) + Expect(filterSel.Keys[2].Mask).To(Equal(uint32(4026531840))) + + // traffic to 10.0.0.0/8 redirected to uncapped class + Expect(filters[1]).To(BeAssignableToTypeOf(&netlink.U32{})) + Expect(filters[1].(*netlink.U32).Actions).To(BeEmpty()) + Expect(filters[1].Attrs().Protocol).To(Equal(uint16(syscall.ETH_P_IP))) + Expect(filters[1].Attrs().LinkIndex).To(Equal(ifbLink.Attrs().Index)) + Expect(filters[1].Attrs().Priority).To(Equal(uint16(16))) + Expect(filters[1].Attrs().Parent).To(Equal(qdiscs[0].Attrs().Handle)) + Expect(filters[1].(*netlink.U32).ClassId).To(Equal(netlink.MakeHandle(1, 1))) + + filterSel = filters[1].(*netlink.U32).Sel + Expect(filterSel).To(BeAssignableToTypeOf(&netlink.TcU32Sel{})) + Expect(filterSel.Flags).To(Equal(uint8(netlink.TC_U32_TERMINAL))) + Expect(filterSel.Keys).To(HaveLen(1)) + Expect(filterSel.Nkeys).To(Equal(uint8(1))) + + // The filter should match to 10.0.0.0/8 dst address in other words it should be: + // match 0a000000/ff000000 at 16 + selKey := filterSel.Keys[0] + Expect(selKey.Val).To(Equal(uint32(10 * math.Pow(256, 3)))) + Expect(selKey.Off).To(Equal(int32(16))) + Expect(selKey.Mask).To(Equal(uint32(255 * math.Pow(256, 3)))) - args := &skel.CmdArgs{ - ContainerID: "dummy", - Netns: containerNs.Path(), - IfName: containerIfname, - StdinData: []byte(conf), - } + hostVethLink, err := netlink.LinkByName(hostIfname) + Expect(err).NotTo(HaveOccurred()) - Expect(hostNs.Do(func(netNS ns.NetNS) error { - defer GinkgoRecover() + qdiscFilters, err := netlink.FilterList(hostVethLink, netlink.MakeHandle(0xffff, 0)) + Expect(err).NotTo(HaveOccurred()) - _, out, err := testutils.CmdAdd(containerNs.Path(), args.ContainerID, ifbDeviceName, []byte(conf), func() error { return cmdAdd(args) }) - Expect(err).NotTo(HaveOccurred(), string(out)) + Expect(qdiscFilters).To(HaveLen(1)) + Expect(qdiscFilters[0].(*netlink.U32).Actions[0].(*netlink.MirredAction).Ifindex).To(Equal(ifbLink.Attrs().Index)) - _, err = netlink.LinkByName(ifbDeviceName) - Expect(err).To(HaveOccurred()) return nil })).To(Succeed()) + // Container ingress (host egress) Expect(hostNs.Do(func(n ns.NetNS) error { defer GinkgoRecover() - containerIfLink, err := netlink.LinkByName(hostIfname) + vethLink, err := netlink.LinkByName(hostIfname) Expect(err).NotTo(HaveOccurred()) - qdiscs, err := netlink.QdiscList(containerIfLink) + qdiscs, err := netlink.QdiscList(vethLink) Expect(err).NotTo(HaveOccurred()) - Expect(qdiscs).To(HaveLen(1)) - Expect(qdiscs[0].Attrs().LinkIndex).To(Equal(containerIfLink.Attrs().Index)) + Expect(qdiscs).To(HaveLen(2)) + Expect(qdiscs[0].Attrs().LinkIndex).To(Equal(vethLink.Attrs().Index)) + Expect(qdiscs[0]).To(BeAssignableToTypeOf(&netlink.Htb{})) + Expect(qdiscs[0].(*netlink.Htb).Defcls).To(Equal(uint32(DefaultClassMinorID))) + + classes, err := netlink.ClassList(vethLink, qdiscs[0].Attrs().Handle) + + Expect(err).NotTo(HaveOccurred()) + Expect(classes).To(HaveLen(2)) + + // Uncapped class + Expect(classes[0]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[0].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, 1))) + Expect(classes[0].(*netlink.HtbClass).Rate).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Buffer).To(Equal(uint32(0))) + Expect(classes[0].(*netlink.HtbClass).Ceil).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + // Class with traffic shapping settings + Expect(classes[1]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[1].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, uint16(qdiscs[0].(*netlink.Htb).Defcls)))) + Expect(classes[1].(*netlink.HtbClass).Rate).To(Equal(uint64(1))) + // Expect(classes[1].(*netlink.HtbClass).Buffer).To(Equal(uint32(15625000))) + Expect(classes[1].(*netlink.HtbClass).Ceil).To(Equal(uint64(2))) + // Expect(classes[1].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + filters, err := netlink.FilterList(vethLink, qdiscs[0].Attrs().Handle) + Expect(err).NotTo(HaveOccurred()) + Expect(filters).To(HaveLen(2)) + + // traffic to fd00:db8:abcd:1234:e000::/68 redirected to uncapped class + Expect(filters[0]).To(BeAssignableToTypeOf(&netlink.U32{})) + Expect(filters[0].(*netlink.U32).Actions).To(BeEmpty()) + Expect(filters[0].Attrs().Protocol).To(Equal(uint16(syscall.ETH_P_IPV6))) + Expect(filters[0].Attrs().LinkIndex).To(Equal(vethLink.Attrs().Index)) + Expect(filters[0].Attrs().Priority).To(Equal(uint16(15))) + Expect(filters[0].Attrs().Parent).To(Equal(qdiscs[0].Attrs().Handle)) + Expect(filters[0].(*netlink.U32).ClassId).To(Equal(netlink.MakeHandle(1, 1))) + + filterSel := filters[0].(*netlink.U32).Sel + Expect(filterSel).To(BeAssignableToTypeOf(&netlink.TcU32Sel{})) + Expect(filterSel.Flags).To(Equal(uint8(netlink.TC_U32_TERMINAL))) + Expect(filterSel.Keys).To(HaveLen(3)) + Expect(filterSel.Nkeys).To(Equal(uint8(3))) + + // The filter should match to fd00:db8:abcd:1234:e000::/68 dst address in other words it should be: + // match 0xfd000db8/0xffffffff at 24 + // match 0xabcd1234/0xffffffff at 28 + // match 0xe0000000/0xf0000000 at 32 + // (and last match discarded because it would be equivalent to a matchall/true condition at 36) + Expect(filterSel.Keys[0].Off).To(Equal(int32(24))) + Expect(filterSel.Keys[0].Val).To(Equal(uint32(4244639160))) + Expect(filterSel.Keys[0].Mask).To(Equal(uint32(4294967295))) + + Expect(filterSel.Keys[1].Off).To(Equal(int32(28))) + Expect(filterSel.Keys[1].Val).To(Equal(uint32(2882343476))) + Expect(filterSel.Keys[1].Mask).To(Equal(uint32(4294967295))) + + Expect(filterSel.Keys[2].Off).To(Equal(int32(32))) + Expect(filterSel.Keys[2].Val).To(Equal(uint32(3758096384))) + Expect(filterSel.Keys[2].Mask).To(Equal(uint32(4026531840))) + + // traffic to 10.0.0.0/8 redirected to uncapped class + Expect(filters[1]).To(BeAssignableToTypeOf(&netlink.U32{})) + Expect(filters[1].(*netlink.U32).Actions).To(BeEmpty()) + Expect(filters[1].Attrs().Protocol).To(Equal(uint16(syscall.ETH_P_IP))) + Expect(filters[1].Attrs().LinkIndex).To(Equal(vethLink.Attrs().Index)) + Expect(filters[1].Attrs().Priority).To(Equal(uint16(16))) + Expect(filters[1].Attrs().Parent).To(Equal(qdiscs[0].Attrs().Handle)) + Expect(filters[1].(*netlink.U32).ClassId).To(Equal(netlink.MakeHandle(1, 1))) + + filterSel = filters[1].(*netlink.U32).Sel + Expect(filterSel).To(BeAssignableToTypeOf(&netlink.TcU32Sel{})) + Expect(filterSel.Flags).To(Equal(uint8(netlink.TC_U32_TERMINAL))) + Expect(filterSel.Keys).To(HaveLen(1)) + Expect(filterSel.Nkeys).To(Equal(uint8(1))) + + // The filter should match to 10.0.0.0/8 dst address in other words it should be: + // match 0a000000/ff000000 at 16 + selKey := filterSel.Keys[0] + Expect(selKey.Val).To(Equal(uint32(10 * math.Pow(256, 3)))) + Expect(selKey.Off).To(Equal(int32(16))) + Expect(selKey.Mask).To(Equal(uint32(255 * math.Pow(256, 3)))) - Expect(qdiscs[0]).To(BeAssignableToTypeOf(&netlink.Tbf{})) - Expect(qdiscs[0].(*netlink.Tbf).Rate).To(Equal(uint64(1000))) - Expect(qdiscs[0].(*netlink.Tbf).Limit).To(Equal(uint32(35))) return nil })).To(Succeed()) }) + }) - It(fmt.Sprintf("[%s] fails an invalid ingress config", ver), func() { - conf := fmt.Sprintf(`{ - "cniVersion": "%s", - "name": "cni-plugin-bandwidth-test", - "type": "bandwidth", - "ingressRate": 0, - "ingressBurst": 123, - "egressRate": 123, - "egressBurst": 123, - "prevResult": { - "interfaces": [ - { - "name": "%s", - "sandbox": "" - }, - { - "name": "%s", - "sandbox": "%s" - } - ], - "ips": [ - { - "version": "4", - "address": "%s/24", - "gateway": "10.0.0.1", - "interface": 1 - } - ], - "routes": [] + It(fmt.Sprintf("[%s] does not apply ingress when disabled", ver), func() { + conf := fmt.Sprintf(`{ + "cniVersion": "%s", + "name": "cni-plugin-bandwidth-test", + "type": "bandwidth", + "ingressRate": 0, + "ingressBurst": 0, + "egressRate": 8000, + "egressBurst": 80, + "nonShapedSubnets": [ + "10.0.0.0/8", + "fd00:db8:abcd:1234:e000::/68" + ], + "prevResult": { + "interfaces": [ + { + "name": "%s", + "sandbox": "" + }, + { + "name": "%s", + "sandbox": "%s" } - }`, ver, hostIfname, containerIfname, containerNs.Path(), containerIP.String()) + ], + "ips": [ + { + "version": "4", + "address": "%s/24", + "gateway": "10.0.0.1", + "interface": 1 + } + ], + "routes": [] + } + }`, ver, hostIfname, containerIfname, containerNs.Path(), containerIP.String()) + + args := &skel.CmdArgs{ + ContainerID: "dummy", + Netns: containerNs.Path(), + IfName: containerIfname, + StdinData: []byte(conf), + } + + // check container egress side / host ingress side, we expect to get some QoS setup there + Expect(hostNs.Do(func(netNS ns.NetNS) error { + defer GinkgoRecover() + + _, out, err := testutils.CmdAdd(containerNs.Path(), args.ContainerID, ifbDeviceName, []byte(conf), func() error { return cmdAdd(args) }) + Expect(err).NotTo(HaveOccurred(), string(out)) + + ifbLink, err := netlink.LinkByName(ifbDeviceName) + Expect(err).NotTo(HaveOccurred()) - args := &skel.CmdArgs{ - ContainerID: "dummy", - Netns: containerNs.Path(), - IfName: "eth0", - StdinData: []byte(conf), - } + qdiscs, err := netlink.QdiscList(ifbLink) + Expect(err).NotTo(HaveOccurred()) - Expect(hostNs.Do(func(netNS ns.NetNS) error { - defer GinkgoRecover() + Expect(qdiscs).To(HaveLen(1)) + Expect(qdiscs[0].Attrs().LinkIndex).To(Equal(ifbLink.Attrs().Index)) + Expect(qdiscs[0]).To(BeAssignableToTypeOf(&netlink.Htb{})) + Expect(qdiscs[0].(*netlink.Htb).Defcls).To(Equal(uint32(DefaultClassMinorID))) + classes, err := netlink.ClassList(ifbLink, qdiscs[0].Attrs().Handle) + Expect(err).NotTo(HaveOccurred()) + Expect(classes).To(HaveLen(2)) + + // Uncapped class + Expect(classes[0]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[0].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, 1))) + Expect(classes[0].(*netlink.HtbClass).Rate).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Buffer).To(Equal(uint32(0))) + Expect(classes[0].(*netlink.HtbClass).Ceil).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + // Class with traffic shapping settings + Expect(classes[1]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[1].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, uint16(DefaultClassMinorID)))) + Expect(classes[1].(*netlink.HtbClass).Rate).To(Equal(uint64(1000))) + // Expect(classes[1].(*netlink.HtbClass).Buffer).To(Equal(uint32(7812500))) + Expect(classes[1].(*netlink.HtbClass).Ceil).To(Equal(uint64(2000))) + // Expect(classes[1].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + filters, err := netlink.FilterList(ifbLink, qdiscs[0].Attrs().Handle) + Expect(err).NotTo(HaveOccurred()) + Expect(filters).To(HaveLen(2)) + + // traffic to fd00:db8:abcd:1234:e000::/68 redirected to uncapped class + Expect(filters[0]).To(BeAssignableToTypeOf(&netlink.U32{})) + Expect(filters[0].(*netlink.U32).Actions).To(BeEmpty()) + Expect(filters[0].Attrs().Protocol).To(Equal(uint16(syscall.ETH_P_IPV6))) + Expect(filters[0].Attrs().LinkIndex).To(Equal(ifbLink.Attrs().Index)) + Expect(filters[0].Attrs().Priority).To(Equal(uint16(15))) + Expect(filters[0].Attrs().Parent).To(Equal(qdiscs[0].Attrs().Handle)) + Expect(filters[0].(*netlink.U32).ClassId).To(Equal(netlink.MakeHandle(1, 1))) + + filterSel := filters[0].(*netlink.U32).Sel + Expect(filterSel).To(BeAssignableToTypeOf(&netlink.TcU32Sel{})) + Expect(filterSel.Flags).To(Equal(uint8(netlink.TC_U32_TERMINAL))) + Expect(filterSel.Keys).To(HaveLen(3)) + Expect(filterSel.Nkeys).To(Equal(uint8(3))) + + // The filter should match to fd00:db8:abcd:1234:e000::/68 dst address in other words it should be: + // match 0xfd000db8/0xffffffff at 24 + // match 0xabcd1234/0xffffffff at 28 + // match 0xe0000000/0xf0000000 at 32 + // (and last match discarded because it would be equivalent to a matchall/true condition at 36) + Expect(filterSel.Keys[0].Off).To(Equal(int32(24))) + Expect(filterSel.Keys[0].Val).To(Equal(uint32(4244639160))) + Expect(filterSel.Keys[0].Mask).To(Equal(uint32(4294967295))) + + Expect(filterSel.Keys[1].Off).To(Equal(int32(28))) + Expect(filterSel.Keys[1].Val).To(Equal(uint32(2882343476))) + Expect(filterSel.Keys[1].Mask).To(Equal(uint32(4294967295))) + + Expect(filterSel.Keys[2].Off).To(Equal(int32(32))) + Expect(filterSel.Keys[2].Val).To(Equal(uint32(3758096384))) + Expect(filterSel.Keys[2].Mask).To(Equal(uint32(4026531840))) + + // traffic to 10.0.0.0/8 redirected to uncapped class + Expect(filters[1]).To(BeAssignableToTypeOf(&netlink.U32{})) + Expect(filters[1].(*netlink.U32).Actions).To(BeEmpty()) + Expect(filters[1].Attrs().Protocol).To(Equal(uint16(syscall.ETH_P_IP))) + Expect(filters[1].Attrs().LinkIndex).To(Equal(ifbLink.Attrs().Index)) + Expect(filters[1].Attrs().Priority).To(Equal(uint16(16))) + Expect(filters[1].Attrs().Parent).To(Equal(qdiscs[0].Attrs().Handle)) + Expect(filters[1].(*netlink.U32).ClassId).To(Equal(netlink.MakeHandle(1, 1))) + + filterSel = filters[1].(*netlink.U32).Sel + Expect(filterSel).To(BeAssignableToTypeOf(&netlink.TcU32Sel{})) + Expect(filterSel.Flags).To(Equal(uint8(netlink.TC_U32_TERMINAL))) + Expect(filterSel.Keys).To(HaveLen(1)) + Expect(filterSel.Nkeys).To(Equal(uint8(1))) + + // The filter should match to 10.0.0.0/8 dst address in other words it should be: + // match 0a000000/ff000000 at 16 + selKey := filterSel.Keys[0] + Expect(selKey.Val).To(Equal(uint32(10 * math.Pow(256, 3)))) + Expect(selKey.Off).To(Equal(int32(16))) + Expect(selKey.Mask).To(Equal(uint32(255 * math.Pow(256, 3)))) + + // check traffic mirroring from veth to ifb + hostVethLink, err := netlink.LinkByName(hostIfname) + Expect(err).NotTo(HaveOccurred()) - _, _, err := testutils.CmdAdd(containerNs.Path(), args.ContainerID, "", []byte(conf), func() error { return cmdAdd(args) }) - Expect(err).To(MatchError("if burst is set, rate must also be set")) - return nil - })).To(Succeed()) - }) + qdiscFilters, err := netlink.FilterList(hostVethLink, netlink.MakeHandle(0xffff, 0)) + Expect(err).NotTo(HaveOccurred()) - It(fmt.Sprintf("[%s] works with a Veth pair using runtime config", ver), func() { - conf := fmt.Sprintf(`{ - "cniVersion": "%s", - "name": "cni-plugin-bandwidth-test", - "type": "bandwidth", - "runtimeConfig": { - "bandWidth": { - "ingressRate": 8, - "ingressBurst": 8, - "egressRate": 16, - "egressBurst": 9 + Expect(qdiscFilters).To(HaveLen(1)) + Expect(qdiscFilters[0].(*netlink.U32).Actions[0].(*netlink.MirredAction).Ifindex).To(Equal(ifbLink.Attrs().Index)) + + return nil + })).To(Succeed()) + + // check container ingress side / host egress side, we should not have any htb qdisc/classes/filters defined for the host veth + // only the qdisc ingress + a noqueue qdisc + Expect(hostNs.Do(func(n ns.NetNS) error { + defer GinkgoRecover() + + containerIfLink, err := netlink.LinkByName(hostIfname) + Expect(err).NotTo(HaveOccurred()) + + qdiscs, err := netlink.QdiscList(containerIfLink) + Expect(err).NotTo(HaveOccurred()) + + Expect(qdiscs).To(HaveLen(2)) + Expect(qdiscs[0]).NotTo(BeAssignableToTypeOf(&netlink.Htb{})) + Expect(qdiscs[1]).NotTo(BeAssignableToTypeOf(&netlink.Htb{})) + + return nil + })).To(Succeed()) + }) + + It(fmt.Sprintf("[%s] does not apply egress when disabled", ver), func() { + conf := fmt.Sprintf(`{ + "cniVersion": "%s", + "name": "cni-plugin-bandwidth-test", + "type": "bandwidth", + "egressRate": 0, + "egressBurst": 0, + "ingressRate": 8000, + "ingressBurst": 80, + "prevResult": { + "interfaces": [ + { + "name": "%s", + "sandbox": "" + }, + { + "name": "%s", + "sandbox": "%s" + } + ], + "ips": [ + { + "version": "4", + "address": "%s/24", + "gateway": "10.0.0.1", + "interface": 1 } + ], + "routes": [] + } + }`, ver, hostIfname, containerIfname, containerNs.Path(), containerIP.String()) + + args := &skel.CmdArgs{ + ContainerID: "dummy", + Netns: containerNs.Path(), + IfName: containerIfname, + StdinData: []byte(conf), + } + + Expect(hostNs.Do(func(netNS ns.NetNS) error { + defer GinkgoRecover() + + _, out, err := testutils.CmdAdd(containerNs.Path(), args.ContainerID, ifbDeviceName, []byte(conf), func() error { return cmdAdd(args) }) + Expect(err).NotTo(HaveOccurred(), string(out)) + + // Since we do not setup any egress QoS, no ifb interface should be created at all + _, err = netlink.LinkByName(ifbDeviceName) + Expect(err).To(HaveOccurred()) + + return nil + })).To(Succeed()) + + Expect(hostNs.Do(func(n ns.NetNS) error { + defer GinkgoRecover() + + containerIfLink, err := netlink.LinkByName(hostIfname) + Expect(err).NotTo(HaveOccurred()) + + // Only one qdisc should be found this time, no ingress qdisc should be there + qdiscs, err := netlink.QdiscList(containerIfLink) + Expect(err).NotTo(HaveOccurred()) + + Expect(qdiscs).To(HaveLen(1)) + Expect(qdiscs[0].Attrs().LinkIndex).To(Equal(containerIfLink.Attrs().Index)) + Expect(qdiscs[0]).To(BeAssignableToTypeOf(&netlink.Htb{})) + Expect(qdiscs[0].(*netlink.Htb).Defcls).To(Equal(uint32(DefaultClassMinorID))) + + classes, err := netlink.ClassList(containerIfLink, qdiscs[0].Attrs().Handle) + + Expect(err).NotTo(HaveOccurred()) + Expect(classes).To(HaveLen(2)) + + // Uncapped class + Expect(classes[0]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[0].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, 1))) + Expect(classes[0].(*netlink.HtbClass).Rate).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Buffer).To(Equal(uint32(0))) + Expect(classes[0].(*netlink.HtbClass).Ceil).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + // Class with traffic shapping settings + Expect(classes[1]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[1].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, uint16(qdiscs[0].(*netlink.Htb).Defcls)))) + Expect(classes[1].(*netlink.HtbClass).Rate).To(Equal(uint64(1000))) + // Expect(classes[1].(*netlink.HtbClass).Buffer).To(Equal(uint32(15625000))) + Expect(classes[1].(*netlink.HtbClass).Ceil).To(Equal(uint64(2000))) + // Expect(classes[1].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + // No subnets are exluded in this example so we should not get any filter + filters, err := netlink.FilterList(containerIfLink, qdiscs[0].Attrs().Handle) + Expect(err).NotTo(HaveOccurred()) + Expect(filters).To(BeEmpty()) + + // Just check no mirroring is setup + qdiscFilters, err := netlink.FilterList(containerIfLink, netlink.MakeHandle(0xffff, 0)) + Expect(err).NotTo(HaveOccurred()) + Expect(qdiscFilters).To(BeEmpty()) + return nil + })).To(Succeed()) + }) + + It(fmt.Sprintf("[%s] fails an invalid ingress config", ver), func() { + conf := fmt.Sprintf(`{ + "cniVersion": "%s", + "name": "cni-plugin-bandwidth-test", + "type": "bandwidth", + "ingressRate": 0, + "ingressBurst": 123, + "egressRate": 123, + "egressBurst": 123, + "prevResult": { + "interfaces": [ + { + "name": "%s", + "sandbox": "" }, - "prevResult": { - "interfaces": [ - { - "name": "%s", - "sandbox": "" - }, - { - "name": "%s", - "sandbox": "%s" - } - ], - "ips": [ - { - "version": "4", - "address": "%s/24", - "gateway": "10.0.0.1", - "interface": 1 - } - ], - "routes": [] + { + "name": "%s", + "sandbox": "%s" } - }`, ver, hostIfname, containerIfname, containerNs.Path(), containerIP.String()) + ], + "ips": [ + { + "version": "4", + "address": "%s/24", + "gateway": "10.0.0.1", + "interface": 1 + } + ], + "routes": [] + } + }`, ver, hostIfname, containerIfname, containerNs.Path(), containerIP.String()) + + args := &skel.CmdArgs{ + ContainerID: "dummy", + Netns: containerNs.Path(), + IfName: "eth0", + StdinData: []byte(conf), + } + + Expect(hostNs.Do(func(netNS ns.NetNS) error { + defer GinkgoRecover() + + _, _, err := testutils.CmdAdd(containerNs.Path(), args.ContainerID, "", []byte(conf), func() error { return cmdAdd(args) }) + Expect(err).To(MatchError("if burst is set, rate must also be set")) + return nil + })).To(Succeed()) + }) - args := &skel.CmdArgs{ - ContainerID: "dummy", - Netns: containerNs.Path(), - IfName: containerIfname, - StdinData: []byte(conf), + It(fmt.Sprintf("[%s] fails an invalid egress config", ver), func() { + conf := fmt.Sprintf(`{ + "cniVersion": "%s", + "name": "cni-plugin-bandwidth-test", + "type": "bandwidth", + "ingressRate": 123, + "ingressBurst": 123, + "egressRate": 0, + "egressBurst": 123, + "prevResult": { + "interfaces": [ + { + "name": "%s", + "sandbox": "" + }, + { + "name": "%s", + "sandbox": "%s" + } + ], + "ips": [ + { + "version": "4", + "address": "%s/24", + "gateway": "10.0.0.1", + "interface": 1 + } + ], + "routes": [] + } + }`, ver, hostIfname, containerIfname, containerNs.Path(), containerIP.String()) + + args := &skel.CmdArgs{ + ContainerID: "dummy", + Netns: containerNs.Path(), + IfName: "eth0", + StdinData: []byte(conf), + } + + Expect(hostNs.Do(func(netNS ns.NetNS) error { + defer GinkgoRecover() + + _, _, err := testutils.CmdAdd(containerNs.Path(), args.ContainerID, "", []byte(conf), func() error { return cmdAdd(args) }) + Expect(err).To(MatchError("if burst is set, rate must also be set")) + return nil + })).To(Succeed()) + }) + + It(fmt.Sprintf("[%s] works with a Veth pair using runtime config", ver), func() { + conf := fmt.Sprintf(`{ + "cniVersion": "%s", + "name": "cni-plugin-bandwidth-test", + "type": "bandwidth", + "runtimeConfig": { + "bandWidth": { + "ingressRate": 8, + "ingressBurst": 8, + "egressRate": 16, + "egressBurst": 9, + "nonShapedSubnets": ["192.168.0.0/24"] } + }, + "prevResult": { + "interfaces": [ + { + "name": "%s", + "sandbox": "" + }, + { + "name": "%s", + "sandbox": "%s" + } + ], + "ips": [ + { + "version": "4", + "address": "%s/24", + "gateway": "10.0.0.1", + "interface": 1 + } + ], + "routes": [] + } + }`, ver, hostIfname, containerIfname, containerNs.Path(), containerIP.String()) + + args := &skel.CmdArgs{ + ContainerID: "dummy", + Netns: containerNs.Path(), + IfName: containerIfname, + StdinData: []byte(conf), + } + + Expect(hostNs.Do(func(netNS ns.NetNS) error { + defer GinkgoRecover() + r, out, err := testutils.CmdAdd(containerNs.Path(), args.ContainerID, "", []byte(conf), func() error { return cmdAdd(args) }) + Expect(err).NotTo(HaveOccurred(), string(out)) + result, err := types100.GetResult(r) + Expect(err).NotTo(HaveOccurred()) - Expect(hostNs.Do(func(netNS ns.NetNS) error { - defer GinkgoRecover() - r, out, err := testutils.CmdAdd(containerNs.Path(), args.ContainerID, "", []byte(conf), func() error { return cmdAdd(args) }) - Expect(err).NotTo(HaveOccurred(), string(out)) - result, err := types100.GetResult(r) - Expect(err).NotTo(HaveOccurred()) + Expect(result.Interfaces).To(HaveLen(3)) + Expect(result.Interfaces[2].Name).To(Equal(ifbDeviceName)) + Expect(result.Interfaces[2].Sandbox).To(Equal("")) - Expect(result.Interfaces).To(HaveLen(3)) - Expect(result.Interfaces[2].Name).To(Equal(ifbDeviceName)) - Expect(result.Interfaces[2].Sandbox).To(Equal("")) + ifbLink, err := netlink.LinkByName(ifbDeviceName) + Expect(err).NotTo(HaveOccurred()) + Expect(ifbLink.Attrs().MTU).To(Equal(hostIfaceMTU)) - ifbLink, err := netlink.LinkByName(ifbDeviceName) - Expect(err).NotTo(HaveOccurred()) - Expect(ifbLink.Attrs().MTU).To(Equal(hostIfaceMTU)) + qdiscs, err := netlink.QdiscList(ifbLink) + Expect(err).NotTo(HaveOccurred()) - qdiscs, err := netlink.QdiscList(ifbLink) - Expect(err).NotTo(HaveOccurred()) + Expect(qdiscs).To(HaveLen(1)) + Expect(qdiscs[0].Attrs().LinkIndex).To(Equal(ifbLink.Attrs().Index)) + Expect(qdiscs[0]).To(BeAssignableToTypeOf(&netlink.Htb{})) + Expect(qdiscs[0].(*netlink.Htb).Defcls).To(Equal(uint32(DefaultClassMinorID))) - Expect(qdiscs).To(HaveLen(1)) - Expect(qdiscs[0].Attrs().LinkIndex).To(Equal(ifbLink.Attrs().Index)) + classes, err := netlink.ClassList(ifbLink, qdiscs[0].Attrs().Handle) - Expect(qdiscs[0]).To(BeAssignableToTypeOf(&netlink.Tbf{})) - Expect(qdiscs[0].(*netlink.Tbf).Rate).To(Equal(uint64(2))) - Expect(qdiscs[0].(*netlink.Tbf).Limit).To(Equal(uint32(1))) + Expect(err).NotTo(HaveOccurred()) + Expect(classes).To(HaveLen(2)) + + // Uncapped class + Expect(classes[0]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[0].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, 1))) + Expect(classes[0].(*netlink.HtbClass).Rate).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Buffer).To(Equal(uint32(0))) + Expect(classes[0].(*netlink.HtbClass).Ceil).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + // Class with traffic shapping settings + Expect(classes[1]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[1].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, uint16(qdiscs[0].(*netlink.Htb).Defcls)))) + Expect(classes[1].(*netlink.HtbClass).Rate).To(Equal(uint64(2))) + // Expect(classes[1].(*netlink.HtbClass).Buffer).To(Equal(uint32(7812500))) + Expect(classes[1].(*netlink.HtbClass).Ceil).To(Equal(uint64(4))) + // Expect(classes[1].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + filters, err := netlink.FilterList(ifbLink, qdiscs[0].Attrs().Handle) + Expect(err).NotTo(HaveOccurred()) + Expect(filters).To(HaveLen(1)) + + // traffic to 192.168.0.0/24 redirected to uncapped class + Expect(filters[0]).To(BeAssignableToTypeOf(&netlink.U32{})) + Expect(filters[0].(*netlink.U32).Actions).To(BeEmpty()) + Expect(filters[0].Attrs().Protocol).To(Equal(uint16(syscall.ETH_P_IP))) + Expect(filters[0].Attrs().LinkIndex).To(Equal(ifbLink.Attrs().Index)) + Expect(filters[0].Attrs().Priority).To(Equal(uint16(16))) + Expect(filters[0].Attrs().Parent).To(Equal(qdiscs[0].Attrs().Handle)) + Expect(filters[0].(*netlink.U32).ClassId).To(Equal(netlink.MakeHandle(1, 1))) + + filterSel := filters[0].(*netlink.U32).Sel + Expect(filterSel).To(BeAssignableToTypeOf(&netlink.TcU32Sel{})) + Expect(filterSel.Flags).To(Equal(uint8(netlink.TC_U32_TERMINAL))) + Expect(filterSel.Keys).To(HaveLen(1)) + Expect(filterSel.Nkeys).To(Equal(uint8(1))) + + // The filter should match to 192.168.0.0/24 dst address in other words it should be: + // match c0a80000/ffffff00 at 16 + selKey := filterSel.Keys[0] + Expect(selKey.Val).To(Equal(uint32(192*math.Pow(256, 3) + 168*math.Pow(256, 2)))) + Expect(selKey.Off).To(Equal(int32(16))) + Expect(selKey.Mask).To(Equal(uint32(255*math.Pow(256, 3) + 255*math.Pow(256, 2) + 255*256))) + + hostVethLink, err := netlink.LinkByName(hostIfname) + Expect(err).NotTo(HaveOccurred()) - hostVethLink, err := netlink.LinkByName(hostIfname) - Expect(err).NotTo(HaveOccurred()) + qdiscFilters, err := netlink.FilterList(hostVethLink, netlink.MakeHandle(0xffff, 0)) + Expect(err).NotTo(HaveOccurred()) - qdiscFilters, err := netlink.FilterList(hostVethLink, netlink.MakeHandle(0xffff, 0)) - Expect(err).NotTo(HaveOccurred()) + Expect(qdiscFilters).To(HaveLen(1)) + Expect(qdiscFilters[0].(*netlink.U32).Actions[0].(*netlink.MirredAction).Ifindex).To(Equal(ifbLink.Attrs().Index)) - Expect(qdiscFilters).To(HaveLen(1)) - Expect(qdiscFilters[0].(*netlink.U32).Actions[0].(*netlink.MirredAction).Ifindex).To(Equal(ifbLink.Attrs().Index)) + return nil + })).To(Succeed()) - return nil - })).To(Succeed()) + // Container ingress (host egress) + Expect(hostNs.Do(func(n ns.NetNS) error { + defer GinkgoRecover() - Expect(hostNs.Do(func(n ns.NetNS) error { - defer GinkgoRecover() + vethLink, err := netlink.LinkByName(hostIfname) + Expect(err).NotTo(HaveOccurred()) - ifbLink, err := netlink.LinkByName(hostIfname) - Expect(err).NotTo(HaveOccurred()) + qdiscs, err := netlink.QdiscList(vethLink) + Expect(err).NotTo(HaveOccurred()) - qdiscs, err := netlink.QdiscList(ifbLink) - Expect(err).NotTo(HaveOccurred()) + Expect(qdiscs).To(HaveLen(2)) + Expect(qdiscs[0].Attrs().LinkIndex).To(Equal(vethLink.Attrs().Index)) + Expect(qdiscs[0]).To(BeAssignableToTypeOf(&netlink.Htb{})) + Expect(qdiscs[0].(*netlink.Htb).Defcls).To(Equal(uint32(DefaultClassMinorID))) - Expect(qdiscs).To(HaveLen(2)) - Expect(qdiscs[0].Attrs().LinkIndex).To(Equal(ifbLink.Attrs().Index)) + classes, err := netlink.ClassList(vethLink, qdiscs[0].Attrs().Handle) - Expect(qdiscs[0]).To(BeAssignableToTypeOf(&netlink.Tbf{})) - Expect(qdiscs[0].(*netlink.Tbf).Rate).To(Equal(uint64(1))) - Expect(qdiscs[0].(*netlink.Tbf).Limit).To(Equal(uint32(1))) - return nil - })).To(Succeed()) - }) + Expect(err).NotTo(HaveOccurred()) + Expect(classes).To(HaveLen(2)) + + // Uncapped class + Expect(classes[0]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[0].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, 1))) + Expect(classes[0].(*netlink.HtbClass).Rate).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Buffer).To(Equal(uint32(0))) + Expect(classes[0].(*netlink.HtbClass).Ceil).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + // Class with traffic shapping settings + Expect(classes[1]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[1].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, uint16(qdiscs[0].(*netlink.Htb).Defcls)))) + Expect(classes[1].(*netlink.HtbClass).Rate).To(Equal(uint64(1))) + // Expect(classes[1].(*netlink.HtbClass).Buffer).To(Equal(uint32(15625000))) + Expect(classes[1].(*netlink.HtbClass).Ceil).To(Equal(uint64(2))) + // Expect(classes[1].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + filters, err := netlink.FilterList(vethLink, qdiscs[0].Attrs().Handle) + Expect(err).NotTo(HaveOccurred()) + Expect(filters).To(HaveLen(1)) + + // traffic to 192.168.0.0/24 redirected to uncapped class + Expect(filters[0]).To(BeAssignableToTypeOf(&netlink.U32{})) + Expect(filters[0].(*netlink.U32).Actions).To(BeEmpty()) + Expect(filters[0].Attrs().Protocol).To(Equal(uint16(syscall.ETH_P_IP))) + Expect(filters[0].Attrs().LinkIndex).To(Equal(vethLink.Attrs().Index)) + Expect(filters[0].Attrs().Priority).To(Equal(uint16(16))) + Expect(filters[0].Attrs().Parent).To(Equal(qdiscs[0].Attrs().Handle)) + Expect(filters[0].(*netlink.U32).ClassId).To(Equal(netlink.MakeHandle(1, 1))) + + filterSel := filters[0].(*netlink.U32).Sel + Expect(filterSel).To(BeAssignableToTypeOf(&netlink.TcU32Sel{})) + Expect(filterSel.Flags).To(Equal(uint8(netlink.TC_U32_TERMINAL))) + Expect(filterSel.Keys).To(HaveLen(1)) + Expect(filterSel.Nkeys).To(Equal(uint8(1))) + + // The filter should match to 192.168.0.0/24 dst address in other words it should be: + // match c0a80000/ffffff00 at 16 + selKey := filterSel.Keys[0] + Expect(selKey.Val).To(Equal(uint32(192*math.Pow(256, 3) + 168*math.Pow(256, 2)))) + Expect(selKey.Off).To(Equal(int32(16))) + Expect(selKey.Mask).To(Equal(uint32(255*math.Pow(256, 3) + 255*math.Pow(256, 2) + 255*256))) + return nil + })).To(Succeed()) + }) + + // Runtime config parameters are expected to be preempted by the global config ones whenever specified + It(fmt.Sprintf("[%s] should apply static config when both static config and runtime config exist", ver), func() { + conf := fmt.Sprintf(`{ + "cniVersion": "%s", + "name": "cni-plugin-bandwidth-test", + "type": "bandwidth", + "ingressRate": 0, + "ingressBurst": 0, + "egressRate": 123, + "egressBurst": 123, + "nonShapedSubnets": ["192.168.0.0/24"], + "runtimeConfig": { + "bandWidth": { + "ingressRate": 8, + "ingressBurst": 8, + "egressRate": 16, + "egressBurst": 9, + "nonShapedSubnets": ["10.0.0.0/8", "fd00:db8:abcd:1234:e000::/68"] + } + }, + "prevResult": { + "interfaces": [ + { + "name": "%s", + "sandbox": "" + }, + { + "name": "%s", + "sandbox": "%s" + } + ], + "ips": [ + { + "version": "4", + "address": "%s/24", + "gateway": "10.0.0.1", + "interface": 1 + } + ], + "routes": [] + } + }`, ver, hostIfname, containerIfname, containerNs.Path(), containerIP.String()) + + args := &skel.CmdArgs{ + ContainerID: "dummy", + Netns: containerNs.Path(), + IfName: containerIfname, + StdinData: []byte(conf), + } + + Expect(hostNs.Do(func(netNS ns.NetNS) error { + defer GinkgoRecover() + r, out, err := testutils.CmdAdd(containerNs.Path(), args.ContainerID, "", []byte(conf), func() error { return cmdAdd(args) }) + Expect(err).NotTo(HaveOccurred(), string(out)) + result, err := types100.GetResult(r) + Expect(err).NotTo(HaveOccurred()) + + Expect(result.Interfaces).To(HaveLen(3)) + Expect(result.Interfaces[2].Name).To(Equal(ifbDeviceName)) + Expect(result.Interfaces[2].Sandbox).To(Equal("")) + + ifbLink, err := netlink.LinkByName(ifbDeviceName) + Expect(err).NotTo(HaveOccurred()) + Expect(ifbLink.Attrs().MTU).To(Equal(hostIfaceMTU)) + + qdiscs, err := netlink.QdiscList(ifbLink) + Expect(err).NotTo(HaveOccurred()) + + Expect(qdiscs).To(HaveLen(1)) + Expect(qdiscs[0].Attrs().LinkIndex).To(Equal(ifbLink.Attrs().Index)) + Expect(qdiscs[0]).To(BeAssignableToTypeOf(&netlink.Htb{})) + Expect(qdiscs[0].(*netlink.Htb).Defcls).To(Equal(uint32(DefaultClassMinorID))) + + classes, err := netlink.ClassList(ifbLink, qdiscs[0].Attrs().Handle) + + Expect(err).NotTo(HaveOccurred()) + Expect(classes).To(HaveLen(2)) + + // Uncapped class + Expect(classes[0]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[0].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, 1))) + Expect(classes[0].(*netlink.HtbClass).Rate).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Buffer).To(Equal(uint32(0))) + Expect(classes[0].(*netlink.HtbClass).Ceil).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + // Class with traffic shapping settings + Expect(classes[1]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[1].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, uint16(qdiscs[0].(*netlink.Htb).Defcls)))) + Expect(classes[1].(*netlink.HtbClass).Rate).To(Equal(uint64(15))) + // Expect(classes[1].(*netlink.HtbClass).Buffer).To(Equal(uint32(7812500))) + Expect(classes[1].(*netlink.HtbClass).Ceil).To(Equal(uint64(30))) + // Expect(classes[1].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + filters, err := netlink.FilterList(ifbLink, qdiscs[0].Attrs().Handle) + Expect(err).NotTo(HaveOccurred()) + Expect(filters).To(HaveLen(1)) + + // traffic to 192.168.0.0/24 redirected to uncapped class + Expect(filters[0]).To(BeAssignableToTypeOf(&netlink.U32{})) + Expect(filters[0].(*netlink.U32).Actions).To(BeEmpty()) + Expect(filters[0].Attrs().Protocol).To(Equal(uint16(syscall.ETH_P_IP))) + Expect(filters[0].Attrs().LinkIndex).To(Equal(ifbLink.Attrs().Index)) + Expect(filters[0].Attrs().Priority).To(Equal(uint16(16))) + Expect(filters[0].Attrs().Parent).To(Equal(qdiscs[0].Attrs().Handle)) + Expect(filters[0].(*netlink.U32).ClassId).To(Equal(netlink.MakeHandle(1, 1))) + + filterSel := filters[0].(*netlink.U32).Sel + Expect(filterSel).To(BeAssignableToTypeOf(&netlink.TcU32Sel{})) + Expect(filterSel.Flags).To(Equal(uint8(netlink.TC_U32_TERMINAL))) + Expect(filterSel.Keys).To(HaveLen(1)) + Expect(filterSel.Nkeys).To(Equal(uint8(1))) + + // The filter should match to 192.168.0.0/24 dst address in other words it should be: + // match c0a80000/ffffff00 at 16 + selKey := filterSel.Keys[0] + Expect(selKey.Val).To(Equal(uint32(192*math.Pow(256, 3) + 168*math.Pow(256, 2)))) + Expect(selKey.Off).To(Equal(int32(16))) + Expect(selKey.Mask).To(Equal(uint32(255*math.Pow(256, 3) + 255*math.Pow(256, 2) + 255*256))) + + hostVethLink, err := netlink.LinkByName(hostIfname) + Expect(err).NotTo(HaveOccurred()) + + qdiscFilters, err := netlink.FilterList(hostVethLink, netlink.MakeHandle(0xffff, 0)) + Expect(err).NotTo(HaveOccurred()) + + Expect(qdiscFilters).To(HaveLen(1)) + Expect(qdiscFilters[0].(*netlink.U32).Actions[0].(*netlink.MirredAction).Ifindex).To(Equal(ifbLink.Attrs().Index)) + + return nil + })).To(Succeed()) + + // Container ingress (host egress) + Expect(hostNs.Do(func(n ns.NetNS) error { + defer GinkgoRecover() + + vethLink, err := netlink.LinkByName(hostIfname) + Expect(err).NotTo(HaveOccurred()) + + qdiscs, err := netlink.QdiscList(vethLink) + Expect(err).NotTo(HaveOccurred()) + + // No ingress QoS just mirroring + Expect(qdiscs).To(HaveLen(2)) + Expect(qdiscs[0].Attrs().LinkIndex).To(Equal(vethLink.Attrs().Index)) + Expect(qdiscs[0]).NotTo(BeAssignableToTypeOf(&netlink.Htb{})) + Expect(qdiscs[1]).NotTo(BeAssignableToTypeOf(&netlink.Htb{})) + + return nil + })).To(Succeed()) + }) + + It(fmt.Sprintf("[%s] should apply static config when both static config and runtime config exist (bad config example)", ver), func() { + conf := fmt.Sprintf(`{ + "cniVersion": "%s", + "name": "cni-plugin-bandwidth-test", + "type": "bandwidth", + "ingressRate": 0, + "ingressBurst": 123, + "egressRate": 123, + "egressBurst": 123, + "runtimeConfig": { + "bandWidth": { + "ingressRate": 8, + "ingressBurst": 8, + "egressRate": 16, + "egressBurst": 9 + } + }, + "prevResult": { + "interfaces": [ + { + "name": "%s", + "sandbox": "" + }, + { + "name": "%s", + "sandbox": "%s" + } + ], + "ips": [ + { + "version": "4", + "address": "%s/24", + "gateway": "10.0.0.1", + "interface": 1 + } + ], + "routes": [] + } + }`, ver, hostIfname, containerIfname, containerNs.Path(), containerIP.String()) + + args := &skel.CmdArgs{ + ContainerID: "dummy", + Netns: containerNs.Path(), + IfName: "eth0", + StdinData: []byte(conf), + } + + Expect(hostNs.Do(func(netNS ns.NetNS) error { + defer GinkgoRecover() + + _, _, err := testutils.CmdAdd(containerNs.Path(), args.ContainerID, "", []byte(conf), func() error { return cmdAdd(args) }) + Expect(err).To(MatchError("if burst is set, rate must also be set")) + return nil + })).To(Succeed()) + }) - It(fmt.Sprintf("[%s] should apply static config when both static config and runtime config exist", ver), func() { + Describe("cmdDEL", func() { + It(fmt.Sprintf("[%s] works with a Veth pair using 0.3.0 config", ver), func() { conf := fmt.Sprintf(`{ "cniVersion": "%s", "name": "cni-plugin-bandwidth-test", "type": "bandwidth", - "ingressRate": 0, - "ingressBurst": 123, - "egressRate": 123, - "egressBurst": 123, - "runtimeConfig": { - "bandWidth": { - "ingressRate": 8, - "ingressBurst": 8, - "egressRate": 16, - "egressBurst": 9 - } - }, + "ingressRate": 8, + "ingressBurst": 8, + "egressRate": 9, + "egressBurst": 9, "prevResult": { "interfaces": [ { @@ -547,22 +1333,38 @@ var _ = Describe("bandwidth test", func() { args := &skel.CmdArgs{ ContainerID: "dummy", Netns: containerNs.Path(), - IfName: "eth0", + IfName: containerIfname, StdinData: []byte(conf), } Expect(hostNs.Do(func(netNS ns.NetNS) error { defer GinkgoRecover() + _, out, err := testutils.CmdAdd(containerNs.Path(), args.ContainerID, "", []byte(conf), func() error { return cmdAdd(args) }) + Expect(err).NotTo(HaveOccurred(), string(out)) + + _, err = netlink.LinkByName(hostIfname) + Expect(err).NotTo(HaveOccurred()) + + _, err = netlink.LinkByName(ifbDeviceName) + Expect(err).NotTo(HaveOccurred()) + + err = testutils.CmdDel(containerNs.Path(), args.ContainerID, "", func() error { return cmdDel(args) }) + Expect(err).NotTo(HaveOccurred(), string(out)) + + _, err = netlink.LinkByName(ifbDeviceName) + Expect(err).To(HaveOccurred()) + + // The host veth peer should remain as it has not be created by this plugin + _, err = netlink.LinkByName(hostIfname) + Expect(err).NotTo(HaveOccurred()) - _, _, err := testutils.CmdAdd(containerNs.Path(), args.ContainerID, "", []byte(conf), func() error { return cmdAdd(args) }) - Expect(err).To(MatchError("if burst is set, rate must also be set")) return nil })).To(Succeed()) }) }) - Describe("cmdDEL", func() { - It(fmt.Sprintf("[%s] works with a Veth pair using 0.3.0 config", ver), func() { + Describe("cmdCHECK", func() { + It(fmt.Sprintf("[%s] works with a Veth pair", ver), func() { conf := fmt.Sprintf(`{ "cniVersion": "%s", "name": "cni-plugin-bandwidth-test", @@ -606,12 +1408,29 @@ var _ = Describe("bandwidth test", func() { _, out, err := testutils.CmdAdd(containerNs.Path(), args.ContainerID, "", []byte(conf), func() error { return cmdAdd(args) }) Expect(err).NotTo(HaveOccurred(), string(out)) + _, err = netlink.LinkByName(hostIfname) + Expect(err).NotTo(HaveOccurred()) + + _, err = netlink.LinkByName(ifbDeviceName) + Expect(err).NotTo(HaveOccurred()) + + if testutils.SpecVersionHasCHECK(ver) { + // Do CNI Check + + err = testutils.CmdCheck(containerNs.Path(), args.ContainerID, "", func() error { return cmdCheck(args) }) + Expect(err).NotTo(HaveOccurred()) + } + err = testutils.CmdDel(containerNs.Path(), args.ContainerID, "", func() error { return cmdDel(args) }) Expect(err).NotTo(HaveOccurred(), string(out)) _, err = netlink.LinkByName(ifbDeviceName) Expect(err).To(HaveOccurred()) + // The host veth peer should remain as it has not be created by this plugin + _, err = netlink.LinkByName(hostIfname) + Expect(err).NotTo(HaveOccurred()) + return nil })).To(Succeed()) }) @@ -689,10 +1508,34 @@ var _ = Describe("bandwidth test", func() { Expect(qdiscs).To(HaveLen(1)) Expect(qdiscs[0].Attrs().LinkIndex).To(Equal(ifbLink.Attrs().Index)) + Expect(qdiscs[0]).To(BeAssignableToTypeOf(&netlink.Htb{})) + Expect(qdiscs[0].(*netlink.Htb).Defcls).To(Equal(uint32(DefaultClassMinorID))) - Expect(qdiscs[0]).To(BeAssignableToTypeOf(&netlink.Tbf{})) - Expect(qdiscs[0].(*netlink.Tbf).Rate).To(Equal(uint64(2))) - Expect(qdiscs[0].(*netlink.Tbf).Limit).To(Equal(uint32(1))) + classes, err := netlink.ClassList(ifbLink, qdiscs[0].Attrs().Handle) + + Expect(err).NotTo(HaveOccurred()) + Expect(classes).To(HaveLen(2)) + + // Uncapped class + Expect(classes[0]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[0].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, 1))) + Expect(classes[0].(*netlink.HtbClass).Rate).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Buffer).To(Equal(uint32(0))) + Expect(classes[0].(*netlink.HtbClass).Ceil).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + // Class with traffic shapping settings + Expect(classes[1]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[1].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, uint16(qdiscs[0].(*netlink.Htb).Defcls)))) + Expect(classes[1].(*netlink.HtbClass).Rate).To(Equal(uint64(2))) + // Expect(classes[1].(*netlink.HtbClass).Buffer).To(Equal(uint32(7812500))) + Expect(classes[1].(*netlink.HtbClass).Ceil).To(Equal(uint64(4))) + // Expect(classes[1].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + // Since we do not exclude anything from egress traffic shapping, we should not find any filter + filters, err := netlink.FilterList(ifbLink, qdiscs[0].Attrs().Handle) + Expect(err).NotTo(HaveOccurred()) + Expect(filters).To(BeEmpty()) hostVethLink, err := netlink.LinkByName(hostIfname) Expect(err).NotTo(HaveOccurred()) @@ -709,18 +1552,43 @@ var _ = Describe("bandwidth test", func() { Expect(hostNs.Do(func(n ns.NetNS) error { defer GinkgoRecover() - ifbLink, err := netlink.LinkByName(hostIfname) + vethLink, err := netlink.LinkByName(hostIfname) Expect(err).NotTo(HaveOccurred()) - qdiscs, err := netlink.QdiscList(ifbLink) + qdiscs, err := netlink.QdiscList(vethLink) Expect(err).NotTo(HaveOccurred()) Expect(qdiscs).To(HaveLen(2)) - Expect(qdiscs[0].Attrs().LinkIndex).To(Equal(ifbLink.Attrs().Index)) + Expect(qdiscs[0].Attrs().LinkIndex).To(Equal(vethLink.Attrs().Index)) + Expect(qdiscs[0]).To(BeAssignableToTypeOf(&netlink.Htb{})) + Expect(qdiscs[0].(*netlink.Htb).Defcls).To(Equal(uint32(DefaultClassMinorID))) + + classes, err := netlink.ClassList(vethLink, qdiscs[0].Attrs().Handle) + + Expect(err).NotTo(HaveOccurred()) + Expect(classes).To(HaveLen(2)) + + // Uncapped class + Expect(classes[0]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[0].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, 1))) + Expect(classes[0].(*netlink.HtbClass).Rate).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Buffer).To(Equal(uint32(0))) + Expect(classes[0].(*netlink.HtbClass).Ceil).To(Equal(UncappedRate)) + Expect(classes[0].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + // Class with traffic shapping settings + Expect(classes[1]).To(BeAssignableToTypeOf(&netlink.HtbClass{})) + Expect(classes[1].(*netlink.HtbClass).Handle).To(Equal(netlink.MakeHandle(1, uint16(qdiscs[0].(*netlink.Htb).Defcls)))) + Expect(classes[1].(*netlink.HtbClass).Rate).To(Equal(uint64(1))) + // Expect(classes[1].(*netlink.HtbClass).Buffer).To(Equal(uint32(15625000))) + Expect(classes[1].(*netlink.HtbClass).Ceil).To(Equal(uint64(2))) + // Expect(classes[1].(*netlink.HtbClass).Cbuffer).To(Equal(uint32(0))) + + // Since we do not exclude anything from ingress traffic shapping, we should not find any filter + filters, err := netlink.FilterList(vethLink, qdiscs[0].Attrs().Handle) + Expect(err).NotTo(HaveOccurred()) + Expect(filters).To(BeEmpty()) - Expect(qdiscs[0]).To(BeAssignableToTypeOf(&netlink.Tbf{})) - Expect(qdiscs[0].(*netlink.Tbf).Rate).To(Equal(uint64(1))) - Expect(qdiscs[0].(*netlink.Tbf).Limit).To(Equal(uint32(1))) return nil })).To(Succeed()) }) @@ -864,182 +1732,190 @@ var _ = Describe("bandwidth test", func() { }) }) - Context(fmt.Sprintf("[%s] when chaining bandwidth plugin with PTP", ver), func() { - var ptpConf string - var rateInBits uint64 - var burstInBits uint64 - var packetInBytes int - var containerWithoutTbfNS ns.NetNS - var containerWithTbfNS ns.NetNS - var portServerWithTbf int - var portServerWithoutTbf int - - var containerWithTbfRes types.Result - var containerWithoutTbfRes types.Result - var echoServerWithTbf *gexec.Session - var echoServerWithoutTbf *gexec.Session - var dataDir string - - BeforeEach(func() { - rateInBytes := 1000 - rateInBits = uint64(rateInBytes * 8) - burstInBits = rateInBits * 2 - packetInBytes = rateInBytes * 25 - - var err error - dataDir, err = os.MkdirTemp("", "bandwidth_linux_test") - Expect(err).NotTo(HaveOccurred()) - - ptpConf = fmt.Sprintf(`{ - "cniVersion": "%s", - "name": "myBWnet", - "type": "ptp", - "ipMasq": true, - "mtu": 512, - "ipam": { - "type": "host-local", - "subnet": "10.1.2.0/24", - "dataDir": "%s" - } - }`, ver, dataDir) - - const ( - containerWithTbfIFName = "ptp0" - containerWithoutTbfIFName = "ptp1" - ) - - containerWithTbfNS, err = testutils.NewNS() - Expect(err).NotTo(HaveOccurred()) - - containerWithoutTbfNS, err = testutils.NewNS() - Expect(err).NotTo(HaveOccurred()) + Describe(fmt.Sprintf("[%s] QoS effective", ver), func() { + Context(fmt.Sprintf("[%s] when chaining bandwidth plugin with PTP", ver), func() { + var ptpConf string + var rateInBits uint64 + var burstInBits uint64 + var packetInBytes int + var containerWithoutQoSNS ns.NetNS + var containerWithQoSNS ns.NetNS + var portServerWithQoS int + var portServerWithoutQoS int + + var containerWithQoSRes types.Result + var containerWithoutQoSRes types.Result + var echoServerWithQoS *gexec.Session + var echoServerWithoutQoS *gexec.Session + var dataDir string + + BeforeEach(func() { + rateInBytes := 1000 + rateInBits = uint64(rateInBytes * 8) + burstInBits = rateInBits * 2 + + // NOTE: Traffic shapping is not that precise at low rates, would be better to use higher rates + simple time+netcat for data transfer, rather than the provided + // client/server bin (limited to small amount of data) + packetInBytes = rateInBytes * 3 + + var err error + dataDir, err = os.MkdirTemp("", "bandwidth_linux_test") + Expect(err).NotTo(HaveOccurred()) - By("create two containers, and use the bandwidth plugin on one of them") - Expect(hostNs.Do(func(ns.NetNS) error { - defer GinkgoRecover() + ptpConf = fmt.Sprintf(`{ + "cniVersion": "%s", + "name": "myBWnet", + "type": "ptp", + "ipMasq": true, + "mtu": 512, + "ipam": { + "type": "host-local", + "subnet": "10.1.2.0/24", + "dataDir": "%s" + } + }`, ver, dataDir) - containerWithTbfRes, _, err = testutils.CmdAdd(containerWithTbfNS.Path(), "dummy", containerWithTbfIFName, []byte(ptpConf), func() error { - r, err := invoke.DelegateAdd(context.TODO(), "ptp", []byte(ptpConf), nil) - Expect(err).NotTo(HaveOccurred()) - Expect(r.Print()).To(Succeed()) + const ( + containerWithQoSIFName = "ptp0" + containerWithoutQoSIFName = "ptp1" + ) - return err - }) + containerWithQoSNS, err = testutils.NewNS() Expect(err).NotTo(HaveOccurred()) - containerWithoutTbfRes, _, err = testutils.CmdAdd(containerWithoutTbfNS.Path(), "dummy2", containerWithoutTbfIFName, []byte(ptpConf), func() error { - r, err := invoke.DelegateAdd(context.TODO(), "ptp", []byte(ptpConf), nil) - Expect(err).NotTo(HaveOccurred()) - Expect(r.Print()).To(Succeed()) - - return err - }) + containerWithoutQoSNS, err = testutils.NewNS() Expect(err).NotTo(HaveOccurred()) - containerWithTbfResult, err := types100.GetResult(containerWithTbfRes) - Expect(err).NotTo(HaveOccurred()) + By("create two containers, and use the bandwidth plugin on one of them") + Expect(hostNs.Do(func(ns.NetNS) error { + defer GinkgoRecover() - tbfPluginConf := &PluginConf{} - err = json.Unmarshal([]byte(ptpConf), &tbfPluginConf) - Expect(err).NotTo(HaveOccurred()) + containerWithQoSRes, _, err = testutils.CmdAdd(containerWithQoSNS.Path(), "dummy", containerWithQoSIFName, []byte(ptpConf), func() error { + r, err := invoke.DelegateAdd(context.TODO(), "ptp", []byte(ptpConf), nil) + Expect(err).NotTo(HaveOccurred()) + Expect(r.Print()).To(Succeed()) - tbfPluginConf.RuntimeConfig.Bandwidth = &BandwidthEntry{ - IngressBurst: burstInBits, - IngressRate: rateInBits, - EgressBurst: burstInBits, - EgressRate: rateInBits, - } - tbfPluginConf.Type = "bandwidth" - newConfBytes, err := buildOneConfig("myBWnet", ver, tbfPluginConf, containerWithTbfResult) - Expect(err).NotTo(HaveOccurred()) + return err + }) + Expect(err).NotTo(HaveOccurred()) - args := &skel.CmdArgs{ - ContainerID: "dummy3", - Netns: containerWithTbfNS.Path(), - IfName: containerWithTbfIFName, - StdinData: newConfBytes, - } + containerWithoutQoSRes, _, err = testutils.CmdAdd(containerWithoutQoSNS.Path(), "dummy2", containerWithoutQoSIFName, []byte(ptpConf), func() error { + r, err := invoke.DelegateAdd(context.TODO(), "ptp", []byte(ptpConf), nil) + Expect(err).NotTo(HaveOccurred()) + Expect(r.Print()).To(Succeed()) - result, out, err := testutils.CmdAdd(containerWithTbfNS.Path(), args.ContainerID, "", newConfBytes, func() error { return cmdAdd(args) }) - Expect(err).NotTo(HaveOccurred(), string(out)) + return err + }) + Expect(err).NotTo(HaveOccurred()) - if testutils.SpecVersionHasCHECK(ver) { - // Do CNI Check - checkConf := &PluginConf{} - err = json.Unmarshal([]byte(ptpConf), &checkConf) + containerWithQoSResult, err := types100.GetResult(containerWithQoSRes) + Expect(err).NotTo(HaveOccurred()) + + bandwidthPluginConf := &PluginConf{} + err = json.Unmarshal([]byte(ptpConf), &bandwidthPluginConf) Expect(err).NotTo(HaveOccurred()) - checkConf.RuntimeConfig.Bandwidth = &BandwidthEntry{ + bandwidthPluginConf.RuntimeConfig.Bandwidth = &BandwidthEntry{ IngressBurst: burstInBits, IngressRate: rateInBits, EgressBurst: burstInBits, EgressRate: rateInBits, } - checkConf.Type = "bandwidth" - - newCheckBytes, err := buildOneConfig("myBWnet", ver, checkConf, result) + bandwidthPluginConf.Type = "bandwidth" + newConfBytes, err := buildOneConfig("myBWnet", ver, bandwidthPluginConf, containerWithQoSResult) Expect(err).NotTo(HaveOccurred()) - args = &skel.CmdArgs{ + args := &skel.CmdArgs{ ContainerID: "dummy3", - Netns: containerWithTbfNS.Path(), - IfName: containerWithTbfIFName, - StdinData: newCheckBytes, + Netns: containerWithQoSNS.Path(), + IfName: containerWithQoSIFName, + StdinData: newConfBytes, } - err = testutils.CmdCheck(containerWithTbfNS.Path(), args.ContainerID, "", func() error { return cmdCheck(args) }) - Expect(err).NotTo(HaveOccurred()) - } + result, out, err := testutils.CmdAdd(containerWithQoSNS.Path(), args.ContainerID, "", newConfBytes, func() error { return cmdAdd(args) }) + Expect(err).NotTo(HaveOccurred(), string(out)) - return nil - })).To(Succeed()) + if testutils.SpecVersionHasCHECK(ver) { + // Do CNI Check + checkConf := &PluginConf{} + err = json.Unmarshal([]byte(ptpConf), &checkConf) + Expect(err).NotTo(HaveOccurred()) - By("starting a tcp server on both containers") - portServerWithTbf, echoServerWithTbf = startEchoServerInNamespace(containerWithTbfNS) - portServerWithoutTbf, echoServerWithoutTbf = startEchoServerInNamespace(containerWithoutTbfNS) - }) + checkConf.RuntimeConfig.Bandwidth = &BandwidthEntry{ + IngressBurst: burstInBits, + IngressRate: rateInBits, + EgressBurst: burstInBits, + EgressRate: rateInBits, + } + checkConf.Type = "bandwidth" - AfterEach(func() { - Expect(os.RemoveAll(dataDir)).To(Succeed()) + newCheckBytes, err := buildOneConfig("myBWnet", ver, checkConf, result) + Expect(err).NotTo(HaveOccurred()) - Expect(containerWithTbfNS.Close()).To(Succeed()) - Expect(testutils.UnmountNS(containerWithTbfNS)).To(Succeed()) - Expect(containerWithoutTbfNS.Close()).To(Succeed()) - Expect(testutils.UnmountNS(containerWithoutTbfNS)).To(Succeed()) + args = &skel.CmdArgs{ + ContainerID: "dummy3", + Netns: containerWithQoSNS.Path(), + IfName: containerWithQoSIFName, + StdinData: newCheckBytes, + } - if echoServerWithoutTbf != nil { - echoServerWithoutTbf.Kill() - } - if echoServerWithTbf != nil { - echoServerWithTbf.Kill() - } - }) + err = testutils.CmdCheck(containerWithQoSNS.Path(), args.ContainerID, "", func() error { return cmdCheck(args) }) + Expect(err).NotTo(HaveOccurred()) + } - Measure("limits ingress traffic on veth device", func(b Benchmarker) { - var runtimeWithLimit time.Duration - var runtimeWithoutLimit time.Duration + return nil + })).To(Succeed()) - By("gather timing statistics about both containers") - By("sending tcp traffic to the container that has traffic shaped", func() { - runtimeWithLimit = b.Time("with tbf", func() { - result, err := types100.GetResult(containerWithTbfRes) - Expect(err).NotTo(HaveOccurred()) + By("starting a tcp server on both containers") + portServerWithQoS, echoServerWithQoS = startEchoServerInNamespace(containerWithQoSNS) + portServerWithoutQoS, echoServerWithoutQoS = startEchoServerInNamespace(containerWithoutQoSNS) + }) - makeTCPClientInNS(hostNs.Path(), result.IPs[0].Address.IP.String(), portServerWithTbf, packetInBytes) - }) + AfterEach(func() { + Expect(os.RemoveAll(dataDir)).To(Succeed()) + + Expect(containerWithQoSNS.Close()).To(Succeed()) + Expect(testutils.UnmountNS(containerWithQoSNS)).To(Succeed()) + Expect(containerWithoutQoSNS.Close()).To(Succeed()) + Expect(testutils.UnmountNS(containerWithoutQoSNS)).To(Succeed()) + + if echoServerWithoutQoS != nil { + echoServerWithoutQoS.Kill() + } + if echoServerWithQoS != nil { + echoServerWithQoS.Kill() + } }) - By("sending tcp traffic to the container that does not have traffic shaped", func() { - runtimeWithoutLimit = b.Time("without tbf", func() { - result, err := types100.GetResult(containerWithoutTbfRes) + It("limits ingress traffic on veth device", func() { + var runtimeWithLimit time.Duration + var runtimeWithoutLimit time.Duration + + By("gather timing statistics about both containers") + + By("sending tcp traffic to the container that has traffic shaped", func() { + start := time.Now() + result, err := types100.GetResult(containerWithQoSRes) Expect(err).NotTo(HaveOccurred()) + makeTCPClientInNS(hostNs.Path(), result.IPs[0].Address.IP.String(), portServerWithQoS, packetInBytes) + end := time.Now() + runtimeWithLimit = end.Sub(start) + log.Printf("Elapsed with qos %.2f", runtimeWithLimit.Seconds()) + }) - makeTCPClientInNS(hostNs.Path(), result.IPs[0].Address.IP.String(), portServerWithoutTbf, packetInBytes) + By("sending tcp traffic to the container that does not have traffic shaped", func() { + start := time.Now() + result, err := types100.GetResult(containerWithoutQoSRes) + Expect(err).NotTo(HaveOccurred()) + makeTCPClientInNS(hostNs.Path(), result.IPs[0].Address.IP.String(), portServerWithoutQoS, packetInBytes) + end := time.Now() + runtimeWithoutLimit = end.Sub(start) + log.Printf("Elapsed without qos %.2f", runtimeWithoutLimit.Seconds()) }) - }) - Expect(runtimeWithLimit).To(BeNumerically(">", runtimeWithoutLimit+1000*time.Millisecond)) - }, 1) + Expect(runtimeWithLimit).To(BeNumerically(">", runtimeWithoutLimit+1000*time.Millisecond)) + }) + }) }) } diff --git a/plugins/meta/bandwidth/bandwidth_suite_test.go b/plugins/meta/bandwidth/bandwidth_suite_test.go index a6f5edd90..bfb2410e2 100644 --- a/plugins/meta/bandwidth/bandwidth_suite_test.go +++ b/plugins/meta/bandwidth/bandwidth_suite_test.go @@ -33,7 +33,7 @@ import ( "github.com/containernetworking/plugins/pkg/ns" ) -func TestTBF(t *testing.T) { +func TestHTB(t *testing.T) { RegisterFailHandler(Fail) RunSpecs(t, "plugins/meta/bandwidth") } diff --git a/plugins/meta/bandwidth/ifb_creator.go b/plugins/meta/bandwidth/ifb_creator.go index 58d76add9..fddb51de9 100644 --- a/plugins/meta/bandwidth/ifb_creator.go +++ b/plugins/meta/bandwidth/ifb_creator.go @@ -15,6 +15,8 @@ package main import ( + "bytes" + "encoding/binary" "fmt" "net" "syscall" @@ -24,14 +26,23 @@ import ( "github.com/containernetworking/plugins/pkg/ip" ) -const latencyInMillis = 25 +const ( + latencyInMillis = 25 + UncappedRate uint64 = 100_000_000_000 + DefaultClassMinorID = 48 +) + +func CreateIfb(ifbDeviceName string, mtu int, qlen int) error { + if qlen < 1000 { + qlen = 1000 + } -func CreateIfb(ifbDeviceName string, mtu int) error { err := netlink.LinkAdd(&netlink.Ifb{ LinkAttrs: netlink.LinkAttrs{ - Name: ifbDeviceName, - Flags: net.FlagUp, - MTU: mtu, + Name: ifbDeviceName, + Flags: net.FlagUp, + MTU: mtu, + TxQLen: qlen, }, }) if err != nil { @@ -49,15 +60,15 @@ func TeardownIfb(deviceName string) error { return err } -func CreateIngressQdisc(rateInBits, burstInBits uint64, hostDeviceName string) error { +func CreateIngressQdisc(rateInBits, burstInBits uint64, excludeSubnets []string, hostDeviceName string) error { hostDevice, err := netlink.LinkByName(hostDeviceName) if err != nil { return fmt.Errorf("get host device: %s", err) } - return createTBF(rateInBits, burstInBits, hostDevice.Attrs().Index) + return createHTB(rateInBits, burstInBits, hostDevice.Attrs().Index, excludeSubnets) } -func CreateEgressQdisc(rateInBits, burstInBits uint64, hostDeviceName string, ifbDeviceName string) error { +func CreateEgressQdisc(rateInBits, burstInBits uint64, excludeSubnets []string, hostDeviceName string, ifbDeviceName string) error { ifbDevice, err := netlink.LinkByName(ifbDeviceName) if err != nil { return fmt.Errorf("get ifb device: %s", err) @@ -105,43 +116,201 @@ func CreateEgressQdisc(rateInBits, burstInBits uint64, hostDeviceName string, if } // throttle traffic on ifb device - err = createTBF(rateInBits, burstInBits, ifbDevice.Attrs().Index) + err = createHTB(rateInBits, burstInBits, ifbDevice.Attrs().Index, excludeSubnets) if err != nil { - return fmt.Errorf("create ifb qdisc: %s", err) + // egress from the container/netns pov = ingress from the main netns/host pov + return fmt.Errorf("create htb container egress qos rules: %s", err) } return nil } -func createTBF(rateInBits, burstInBits uint64, linkIndex int) error { - // Equivalent to - // tc qdisc add dev link root tbf - // rate netConf.BandwidthLimits.Rate - // burst netConf.BandwidthLimits.Burst - if rateInBits <= 0 { - return fmt.Errorf("invalid rate: %d", rateInBits) +func createHTB(rateInBits, burstInBits uint64, linkIndex int, excludeSubnets []string) error { + // Netlink struct fields are not clear, let's use shell + + // Step 1 qdisc + // cmd := exec.Command("/usr/sbin/tc", "qdisc", "add", "dev", interfaceName, "root", "handle", "1:", "htb", "default", "30") + qdisc := &netlink.Htb{ + QdiscAttrs: netlink.QdiscAttrs{ + LinkIndex: linkIndex, + Handle: netlink.MakeHandle(1, 0), + Parent: netlink.HANDLE_ROOT, + }, + Defcls: DefaultClassMinorID, + // No idea what these are so let's keep the default values from source code... + Version: 3, + Rate2Quantum: 10, } - if burstInBits <= 0 { - return fmt.Errorf("invalid burst: %d", burstInBits) + err := netlink.QdiscAdd(qdisc) + if err != nil { + return fmt.Errorf("error while creating qdisc: %s", err) } + + // Step 2 classes + rateInBytes := rateInBits / 8 burstInBytes := burstInBits / 8 bufferInBytes := buffer(rateInBytes, uint32(burstInBytes)) - latency := latencyInUsec(latencyInMillis) - limitInBytes := limit(rateInBytes, latency, uint32(burstInBytes)) - qdisc := &netlink.Tbf{ - QdiscAttrs: netlink.QdiscAttrs{ + // The capped class for all but excluded subnets + // cmd = exec.Command("/usr/sbin/tc", "class", "add", "dev", interfaceName, "parent", "1:", "classid", "1:30", "htb", "rate", + // fmt.Sprintf("%d", rateInBits), "burst", fmt.Sprintf("%d", burstInBits)) + defClass := &netlink.HtbClass{ + ClassAttrs: netlink.ClassAttrs{ LinkIndex: linkIndex, - Handle: netlink.MakeHandle(1, 0), - Parent: netlink.HANDLE_ROOT, + Handle: netlink.MakeHandle(1, DefaultClassMinorID), + Parent: netlink.MakeHandle(1, 0), }, - Limit: limitInBytes, Rate: rateInBytes, Buffer: bufferInBytes, + // Let's set up the "burst" rate to twice the specified rate + Ceil: 2 * rateInBytes, + Cbuffer: bufferInBytes, } - err := netlink.QdiscAdd(qdisc) + + err = netlink.ClassAdd(defClass) + if err != nil { + return fmt.Errorf("error while creating htb default class: %s", err) + } + + // The uncapped class for the excluded subnets + // cmd = exec.Command("/usr/sbin/tc", "class", "add", "dev", interfaceName, "parent", "1:", "classid", "1:1", "htb", + // "rate", "100000000000") + bigRate := UncappedRate + uncappedClass := &netlink.HtbClass{ + ClassAttrs: netlink.ClassAttrs{ + LinkIndex: linkIndex, + Handle: netlink.MakeHandle(1, 1), + Parent: qdisc.Handle, + }, + Rate: bigRate, + Ceil: bigRate, + // No need for any burst, the minimum buffer size in q_htb.c should be enough to handle the rate which + // is already more than enough + } + err = netlink.ClassAdd(uncappedClass) if err != nil { - return fmt.Errorf("create qdisc: %s", err) + return fmt.Errorf("error while creating htb uncapped class: %s", err) + } + + // Now add filters to redirect excluded subnets to the class 1 instead of the default one (30) + + for _, subnet := range excludeSubnets { + + // cmd = exec.Command("/usr/sbin/tc", "filter", "add", "dev", interfaceName, "parent", "1:", "protocol", protocol, + // "prio", "16", "u32", "match", "ip", "dst", subnet, "flowid", "1:1") + + _, nw, err := net.ParseCIDR(subnet) + if err != nil { + return fmt.Errorf("bad subnet %s: %s", subnet, err) + } + var maskBytes []byte = nw.Mask + var subnetBytes []byte = nw.IP + + if len(maskBytes) != len(subnetBytes) { + return fmt.Errorf("error using net lib for subnet %s len(maskBytes) != len(subnetBytes) "+ + "(%d != %d) should not happen", subnet, len(maskBytes), len(subnetBytes)) + } + + isIpv4 := nw.IP.To4() != nil + protocol := syscall.ETH_P_IPV6 + var prio uint16 = 15 + var offset int32 = 24 + keepBytes := 16 + if isIpv4 { + protocol = syscall.ETH_P_IP + offset = 16 + keepBytes = 4 + // prio/pref needs to be changed if we change the protocol, looks like we cannot mix protocols with the same pref + prio = 16 + + } + + // protocol := syscall.ETH_P_ALL + + if len(maskBytes) < keepBytes { + return fmt.Errorf("error with net lib, unexpected count of bytes for ipv4 mask (%d < %d)", + len(maskBytes), keepBytes) + } + if len(subnetBytes) < keepBytes { + return fmt.Errorf("error with net lib, unexpected count of bytes for ipv4 subnet (%d < %d)", + len(subnetBytes), keepBytes) + } + maskBytes = maskBytes[len(maskBytes)-keepBytes:] + subnetBytes = subnetBytes[len(subnetBytes)-keepBytes:] + + // For ipv4 we should have at most 1 key, for ipv6 at most 4 + keys := make([]netlink.TcU32Key, 0, 4) + + for i := 0; i < len(maskBytes); i += 4 { + var mask, subnetI uint32 + buf := bytes.NewReader(maskBytes[i : i+4]) + err = binary.Read(buf, binary.BigEndian, &mask) + if err != nil { + return fmt.Errorf("error, htb filter, unable to build mask match filter, iter %d for subnet %s", + i, subnet) + } + + if mask != 0 { + // If mask == 0, any value on this section will be a match and we do not need a filter for this + buf = bytes.NewReader(subnetBytes[i : i+4]) + err = binary.Read(buf, binary.BigEndian, &subnetI) + if err != nil { + return fmt.Errorf("error, htb filter, unable to build subnet match filter, iter %d for subnet %s", + i, subnet) + } + keys = append(keys, netlink.TcU32Key{ + Mask: mask, + Val: subnetI, + Off: offset, + OffMask: 0, + }) + } + + offset += 4 + } + + if len(keys) != cap(keys) { + shrinkedKeys := make([]netlink.TcU32Key, len(keys)) + copied := copy(shrinkedKeys, keys) + if copied != len(keys) { + return fmt.Errorf("copy tc u32 keys error, for subnet %s copied %d != keys %d", subnet, copied, len(keys)) + } + keys = shrinkedKeys + } + + if isIpv4 && len(keys) > 1 { + return fmt.Errorf("error, htb ipv4 filter, unexpected rule length (%d > 1), for subnet %s", + len(keys), subnet) + } else if len(keys) > 4 { + return fmt.Errorf("error, htb ipv6 filter, unexpected rule length (%d > 4), for subnet %s", + len(keys), subnet) + } + + // If len(keys) == 0, it means that we want to wildcard all traffic on the non default/uncapped class + var selector *netlink.TcU32Sel + if len(keys) > 0 { + selector = &netlink.TcU32Sel{ + Nkeys: uint8(len(keys)), + Flags: netlink.TC_U32_TERMINAL, + Keys: keys, + } + } + + tcFilter := netlink.U32{ + FilterAttrs: netlink.FilterAttrs{ + LinkIndex: linkIndex, + Parent: qdisc.Handle, + Priority: prio, + Protocol: uint16(protocol), + }, + ClassId: uncappedClass.Handle, + Sel: selector, + } + + err = netlink.FilterAdd(&tcFilter) + if err != nil { + return fmt.Errorf("error, unable to create htb filter, details %s", err) + } } return nil } @@ -153,11 +322,3 @@ func time2Tick(time uint32) uint32 { func buffer(rate uint64, burst uint32) uint32 { return time2Tick(uint32(float64(burst) * float64(netlink.TIME_UNITS_PER_SEC) / float64(rate))) } - -func limit(rate uint64, latency float64, buffer uint32) uint32 { - return uint32(float64(rate)*latency/float64(netlink.TIME_UNITS_PER_SEC)) + buffer -} - -func latencyInUsec(latencyInMillis float64) float64 { - return float64(netlink.TIME_UNITS_PER_SEC) * (latencyInMillis / 1000.0) -} diff --git a/plugins/meta/bandwidth/main.go b/plugins/meta/bandwidth/main.go index 66eae1521..5b6f54ea8 100644 --- a/plugins/meta/bandwidth/main.go +++ b/plugins/meta/bandwidth/main.go @@ -18,6 +18,7 @@ import ( "encoding/json" "fmt" "math" + "net" "github.com/vishvananda/netlink" @@ -39,8 +40,9 @@ const ( // BandwidthEntry corresponds to a single entry in the bandwidth argument, // see CONVENTIONS.md type BandwidthEntry struct { - IngressRate uint64 `json:"ingressRate"` // Bandwidth rate in bps for traffic through container. 0 for no limit. If ingressRate is set, ingressBurst must also be set - IngressBurst uint64 `json:"ingressBurst"` // Bandwidth burst in bits for traffic through container. 0 for no limit. If ingressBurst is set, ingressRate must also be set + NonShapedSubnets []string `json:"nonShapedSubnets"` // Ipv4/ipv6 subnets to be excluded from traffic shaping + IngressRate uint64 `json:"ingressRate"` // Bandwidth rate in bps for traffic through container. 0 for no limit. If ingressRate is set, ingressBurst must also be set + IngressBurst uint64 `json:"ingressBurst"` // Bandwidth burst in bits for traffic through container. 0 for no limit. If ingressBurst is set, ingressRate must also be set EgressRate uint64 `json:"egressRate"` // Bandwidth rate in bps for traffic through container. 0 for no limit. If egressRate is set, egressBurst must also be set EgressBurst uint64 `json:"egressBurst"` // Bandwidth burst in bits for traffic through container. 0 for no limit. If egressBurst is set, egressRate must also be set @@ -96,10 +98,16 @@ func parseConfig(stdin []byte) (*PluginConf, error) { } func getBandwidth(conf *PluginConf) *BandwidthEntry { - if conf.BandwidthEntry == nil && conf.RuntimeConfig.Bandwidth != nil { - return conf.RuntimeConfig.Bandwidth + bw := conf.BandwidthEntry + if bw == nil && conf.RuntimeConfig.Bandwidth != nil { + bw = conf.RuntimeConfig.Bandwidth } - return conf.BandwidthEntry + + if bw != nil && bw.NonShapedSubnets == nil { + bw.NonShapedSubnets = make([]string, 0) + } + + return bw } func validateRateAndBurst(rate, burst uint64) error { @@ -119,13 +127,13 @@ func getIfbDeviceName(networkName string, containerID string) string { return utils.MustFormatHashWithPrefix(maxIfbDeviceLength, ifbDevicePrefix, networkName+containerID) } -func getMTU(deviceName string) (int, error) { +func getMTUAndQLen(deviceName string) (int, int, error) { link, err := netlink.LinkByName(deviceName) if err != nil { - return -1, err + return -1, -1, err } - return link.Attrs().MTU, nil + return link.Attrs().MTU, link.Attrs().TxQLen, nil } // get the veth peer of container interface in host namespace @@ -159,6 +167,16 @@ func getHostInterface(interfaces []*current.Interface, containerIfName string, n return nil, fmt.Errorf("no veth peer of container interface found in host ns") } +func validateSubnets(subnets []string) error { + for _, subnet := range subnets { + _, _, err := net.ParseCIDR(subnet) + if err != nil { + return fmt.Errorf("bad subnet %q provided, details %s", subnet, err) + } + } + return nil +} + func cmdAdd(args *skel.CmdArgs) error { conf, err := parseConfig(args.StdinData) if err != nil { @@ -170,6 +188,10 @@ func cmdAdd(args *skel.CmdArgs) error { return types.PrintResult(conf.PrevResult, conf.CNIVersion) } + if err = validateSubnets(bandwidth.NonShapedSubnets); err != nil { + return err + } + if conf.PrevResult == nil { return fmt.Errorf("must be called as chained plugin") } @@ -191,21 +213,22 @@ func cmdAdd(args *skel.CmdArgs) error { } if bandwidth.IngressRate > 0 && bandwidth.IngressBurst > 0 { - err = CreateIngressQdisc(bandwidth.IngressRate, bandwidth.IngressBurst, hostInterface.Name) + err = CreateIngressQdisc(bandwidth.IngressRate, bandwidth.IngressBurst, + bandwidth.NonShapedSubnets, hostInterface.Name) if err != nil { return err } } if bandwidth.EgressRate > 0 && bandwidth.EgressBurst > 0 { - mtu, err := getMTU(hostInterface.Name) + mtu, qlen, err := getMTUAndQLen(hostInterface.Name) if err != nil { return err } ifbDeviceName := getIfbDeviceName(conf.Name, args.ContainerID) - err = CreateIfb(ifbDeviceName, mtu) + err = CreateIfb(ifbDeviceName, mtu, qlen) if err != nil { return err } @@ -219,7 +242,8 @@ func cmdAdd(args *skel.CmdArgs) error { Name: ifbDeviceName, Mac: ifbDevice.Attrs().HardwareAddr.String(), }) - err = CreateEgressQdisc(bandwidth.EgressRate, bandwidth.EgressBurst, hostInterface.Name, ifbDeviceName) + err = CreateEgressQdisc(bandwidth.EgressRate, bandwidth.EgressBurst, + bandwidth.NonShapedSubnets, hostInterface.Name, ifbDeviceName) if err != nil { return err } @@ -292,75 +316,98 @@ func cmdCheck(args *skel.CmdArgs) error { bandwidth := getBandwidth(bwConf) + err = validateSubnets(bandwidth.NonShapedSubnets) + if err != nil { + return fmt.Errorf("failed to check subnets, details %s", err) + } + if bandwidth.IngressRate > 0 && bandwidth.IngressBurst > 0 { rateInBytes := bandwidth.IngressRate / 8 burstInBytes := bandwidth.IngressBurst / 8 bufferInBytes := buffer(rateInBytes, uint32(burstInBytes)) - latency := latencyInUsec(latencyInMillis) - limitInBytes := limit(rateInBytes, latency, uint32(burstInBytes)) - - qdiscs, err := SafeQdiscList(link) + err = checkHTB(link, rateInBytes, bufferInBytes) if err != nil { return err } - if len(qdiscs) == 0 { - return fmt.Errorf("Failed to find qdisc") - } - - for _, qdisc := range qdiscs { - tbf, isTbf := qdisc.(*netlink.Tbf) - if !isTbf { - break - } - if tbf.Rate != rateInBytes { - return fmt.Errorf("Rate doesn't match") - } - if tbf.Limit != limitInBytes { - return fmt.Errorf("Limit doesn't match") - } - if tbf.Buffer != bufferInBytes { - return fmt.Errorf("Buffer doesn't match") - } - } } - if bandwidth.EgressRate > 0 && bandwidth.EgressBurst > 0 { rateInBytes := bandwidth.EgressRate / 8 burstInBytes := bandwidth.EgressBurst / 8 bufferInBytes := buffer(rateInBytes, uint32(burstInBytes)) - latency := latencyInUsec(latencyInMillis) - limitInBytes := limit(rateInBytes, latency, uint32(burstInBytes)) - ifbDeviceName := getIfbDeviceName(bwConf.Name, args.ContainerID) - ifbDevice, err := netlink.LinkByName(ifbDeviceName) if err != nil { return fmt.Errorf("get ifb device: %s", err) } - - qdiscs, err := SafeQdiscList(ifbDevice) + err = checkHTB(ifbDevice, rateInBytes, bufferInBytes) if err != nil { return err } - if len(qdiscs) == 0 { - return fmt.Errorf("Failed to find qdisc") + } + return nil +} + +func checkHTB(link netlink.Link, rateInBytes uint64, bufferInBytes uint32) error { + qdiscs, err := SafeQdiscList(link) + if err != nil { + return err + } + if len(qdiscs) == 0 { + return fmt.Errorf("Failed to find qdisc") + } + foundHTB := false + for _, qdisc := range qdiscs { + htb, isHtb := qdisc.(*netlink.Htb) + if !isHtb { + continue } - for _, qdisc := range qdiscs { - tbf, isTbf := qdisc.(*netlink.Tbf) - if !isTbf { - break - } - if tbf.Rate != rateInBytes { - return fmt.Errorf("Rate doesn't match") - } - if tbf.Limit != limitInBytes { - return fmt.Errorf("Limit doesn't match") + if foundHTB { + return fmt.Errorf("Several htb qdisc found for device %s", link.Attrs().Name) + } + + foundHTB = true + if htb.Defcls != DefaultClassMinorID { + return fmt.Errorf("Default class does not match") + } + + classes, err := netlink.ClassList(link, htb.Handle) + if err != nil { + return fmt.Errorf("Unable to list classes bound to htb qdisc for device %s. Details %s", + link.Attrs().Name, err) + } + if len(classes) != 2 { + return fmt.Errorf("Number of htb classes does not match for device %s (%d != 2)", + link.Attrs().Name, len(classes)) + } + + for _, c := range classes { + htbClass, isHtb := c.(*netlink.HtbClass) + if !isHtb { + return fmt.Errorf("Unexpected class for parent htb qdisc bound to device %s", link.Attrs().Name) } - if tbf.Buffer != bufferInBytes { - return fmt.Errorf("Buffer doesn't match") + if htbClass.Handle == htb.Defcls { + if htbClass.Rate != rateInBytes { + return fmt.Errorf("Rate does not match for the default class for device %s (%d != %d)", + link.Attrs().Name, htbClass.Rate, rateInBytes) + } + + if htbClass.Buffer != bufferInBytes { + return fmt.Errorf("Burst buffer size does not match for the default class for device %s (%d != %d)", + link.Attrs().Name, htbClass.Buffer, bufferInBytes) + } + } else if htbClass.Handle == netlink.MakeHandle(1, 1) { + if htbClass.Rate != UncappedRate { + return fmt.Errorf("Rate does not match for the uncapped class for device %s (%d != %d)", + link.Attrs().Name, htbClass.Rate, UncappedRate) + } } } + + // TODO: check non shaped subnet filters + // if bandwidth.NonShapedSubnets { + // filters, err := netlink.FilterList(link, htb.Handle) + // } } return nil