From 5df82dcbcb10205c419f2a9829c9a50e1e566352 Mon Sep 17 00:00:00 2001 From: Tom Denham Date: Fri, 10 Nov 2017 16:42:09 -0800 Subject: [PATCH] network/iptables: Add iptables rules to FORWARD chain To work around the Docker change from v1.13 which changed the default FORWARD policy to DROP. The change has bitten many many users. The troubleshooting documentation is also updated talk about the issue. Replaces PR #862 Fixes #834 Fixes #823 Fixes #609 Fixes #799 --- Documentation/troubleshooting.md | 8 ++- main.go | 7 +- network/{ipmasq.go => iptables.go} | 75 ++++++++++++-------- network/{ipmasq_test.go => iptables_test.go} | 35 ++++----- 4 files changed, 73 insertions(+), 52 deletions(-) rename network/{ipmasq.go => iptables.go} (53%) rename network/{ipmasq_test.go => iptables_test.go} (70%) diff --git a/Documentation/troubleshooting.md b/Documentation/troubleshooting.md index 046bfacd9..1603a5ff6 100644 --- a/Documentation/troubleshooting.md +++ b/Documentation/troubleshooting.md @@ -2,6 +2,12 @@ # General +## Connectivity +In Docker v1.13 and later, the default iptables forwarding policy was changed to `DROP`. For more detail on the Docker change, see the Docker [documentation](https://docs.docker.com/engine/userguide/networking/default_network/container-communication/#container-communication-between-hosts). + +This problems manifests itself as connectivity problems between containers running on different hosts. To resolve it upgrade to the lateset version of flannel. + + ## Logging Flannel uses the `glog` library but only supports logging to stderr. The severity level can't be changed but the verbosity can be changed with the `-v` option. Flannel does not make extensive use of the verbosity level but increasing th value from `0` (the default) will result in some additional logs. To get the most detailed logs, use `-v=10` @@ -49,7 +55,7 @@ Flannel is known to scale to a very large number of hosts. A delay in contacting Flannel relies on the underlying network so that's the first thing to check if you're seeing poor data plane performance. There are two flannel specific choices that can have a big impact on performance -1) T1) The type of backend. For example, if encapsulation is used, `vxlan` will always perform better than `udp`. For maximum data plane performance, avoid encapsulation. +1) The type of backend. For example, if encapsulation is used, `vxlan` will always perform better than `udp`. For maximum data plane performance, avoid encapsulation. 2) The size of the MTU can have a large impact. To achieve maximum raw bandwidth, a network supporting a large MTU should be used. Flannel writes an MTU setting to the `subnet.env` file. This file is read by either the Docker daemon or the CNI flannel plugin which does the networking for individual containers. To troubleshoot, first ensure that the network interface that flannel is using has the right MTU. Then check that the correct MTU is written to the `subnet.env`. Finally, check that the containers have the correct MTU on their virtual ethernet device. diff --git a/main.go b/main.go index 24cccf1ca..715d5669f 100644 --- a/main.go +++ b/main.go @@ -284,9 +284,14 @@ func main() { // Set up ipMasq if needed if opts.ipMasq { - go network.SetupAndEnsureIPMasq(config.Network, bn.Lease()) + go network.SetupAndEnsureIPTables(network.MasqRules(config.Network, bn.Lease())) } + // Always enables forwarding rules. This is needed for Docker versions >1.13 (https://docs.docker.com/engine/userguide/networking/default_network/container-communication/#container-communication-between-hosts) + // In Docker 1.12 and earlier, the default FORWARD chain policy was ACCEPT. + // In Docker 1.13 and later, Docker sets the default policy of the FORWARD chain to DROP. + go network.SetupAndEnsureIPTables(network.ForwardRules(config.Network.String())) + if err := WriteSubnetFile(opts.subnetFile, config.Network, opts.ipMasq, bn); err != nil { // Continue, even though it failed. log.Warningf("Failed to write subnet file: %s", err) diff --git a/network/ipmasq.go b/network/iptables.go similarity index 53% rename from network/ipmasq.go rename to network/iptables.go index 48644ed67..13d9304b9 100644 --- a/network/ipmasq.go +++ b/network/iptables.go @@ -20,37 +20,52 @@ import ( log "github.com/golang/glog" + "time" + "github.com/coreos/flannel/pkg/ip" "github.com/coreos/flannel/subnet" "github.com/coreos/go-iptables/iptables" - "time" ) -type IPTablesRules interface { +type IPTables interface { AppendUnique(table string, chain string, rulespec ...string) error Delete(table string, chain string, rulespec ...string) error Exists(table string, chain string, rulespec ...string) (bool, error) } -func rules(ipn ip.IP4Net, lease *subnet.Lease) [][]string { +type IPTablesRule struct { + table string + chain string + rulespec []string +} + +func MasqRules(ipn ip.IP4Net, lease *subnet.Lease) []IPTablesRule { n := ipn.String() sn := lease.Subnet.String() - return [][]string{ + return []IPTablesRule{ // This rule makes sure we don't NAT traffic within overlay network (e.g. coming out of docker0) - {"-s", n, "-d", n, "-j", "RETURN"}, + {"nat", "POSTROUTING", []string{"-s", n, "-d", n, "-j", "RETURN"}}, // NAT if it's not multicast traffic - {"-s", n, "!", "-d", "224.0.0.0/4", "-j", "MASQUERADE"}, + {"nat", "POSTROUTING", []string{"-s", n, "!", "-d", "224.0.0.0/4", "-j", "MASQUERADE"}}, // Prevent performing Masquerade on external traffic which arrives from a Node that owns the container/pod IP address - {"!", "-s", n, "-d", sn, "-j", "RETURN"}, + {"nat", "POSTROUTING", []string{"!", "-s", n, "-d", sn, "-j", "RETURN"}}, // Masquerade anything headed towards flannel from the host - {"!", "-s", n, "-d", n, "-j", "MASQUERADE"}, + {"nat", "POSTROUTING", []string{"!", "-s", n, "-d", n, "-j", "MASQUERADE"}}, + } +} + +func ForwardRules(flannelNetwork string) []IPTablesRule { + return []IPTablesRule{ + // These rules allow traffic to be forwarded if it is to or from the flannel network range. + {"filter", "FORWARD", []string{"-s", flannelNetwork, "-j", "ACCEPT"}}, + {"filter", "FORWARD", []string{"-d", flannelNetwork, "-j", "ACCEPT"}}, } } -func ipMasqRulesExist(ipt IPTablesRules, ipn ip.IP4Net, lease *subnet.Lease) (bool, error) { - for _, rule := range rules(ipn, lease) { - exists, err := ipt.Exists("nat", "POSTROUTING", rule...) +func ipTablesRulesExist(ipt IPTables, rules []IPTablesRule) (bool, error) { + for _, rule := range rules { + exists, err := ipt.Exists(rule.table, rule.chain, rule.rulespec...) if err != nil { // this shouldn't ever happen return false, fmt.Errorf("failed to check rule existence: %v", err) @@ -63,30 +78,30 @@ func ipMasqRulesExist(ipt IPTablesRules, ipn ip.IP4Net, lease *subnet.Lease) (bo return true, nil } -func SetupAndEnsureIPMasq(network ip.IP4Net, lease *subnet.Lease) { +func SetupAndEnsureIPTables(rules []IPTablesRule) { ipt, err := iptables.New() if err != nil { // if we can't find iptables, give up and return - log.Errorf("Failed to setup IP Masquerade. IPTables was not found: %v", err) + log.Errorf("Failed to setup IPTables. iptables binary was not found: %v", err) return } defer func() { - teardownIPMasq(ipt, network, lease) + teardownIPTables(ipt, rules) }() for { - // Ensure that all the rules exist every 5 seconds - if err := ensureIPMasq(ipt, network, lease); err != nil { - log.Errorf("Failed to ensure IP Masquerade: %v", err) + // Ensure that all the iptables rules exist every 5 seconds + if err := ensureIPTables(ipt, rules); err != nil { + log.Errorf("Failed to ensure iptables rules: %v", err) } time.Sleep(5 * time.Second) } } -func ensureIPMasq(ipt IPTablesRules, ipn ip.IP4Net, lease *subnet.Lease) error { - exists, err := ipMasqRulesExist(ipt, ipn, lease) +func ensureIPTables(ipt IPTables, rules []IPTablesRule) error { + exists, err := ipTablesRulesExist(ipt, rules) if err != nil { return fmt.Errorf("Error checking rule existence: %v", err) } @@ -97,30 +112,30 @@ func ensureIPMasq(ipt IPTablesRules, ipn ip.IP4Net, lease *subnet.Lease) error { // Otherwise, teardown all the rules and set them up again // We do this because the order of the rules is important log.Info("Some iptables rules are missing; deleting and recreating rules") - teardownIPMasq(ipt, ipn, lease) - if err = setupIPMasq(ipt, ipn, lease); err != nil { + teardownIPTables(ipt, rules) + if err = setupIPTables(ipt, rules); err != nil { return fmt.Errorf("Error setting up rules: %v", err) } return nil } -func setupIPMasq(ipt IPTablesRules, ipn ip.IP4Net, lease *subnet.Lease) error { - for _, rule := range rules(ipn, lease) { - log.Info("Adding iptables rule: ", strings.Join(rule, " ")) - err := ipt.AppendUnique("nat", "POSTROUTING", rule...) +func setupIPTables(ipt IPTables, rules []IPTablesRule) error { + for _, rule := range rules { + log.Info("Adding iptables rule: ", strings.Join(rule.rulespec, " ")) + err := ipt.AppendUnique(rule.table, rule.chain, rule.rulespec...) if err != nil { - return fmt.Errorf("failed to insert IP masquerade rule: %v", err) + return fmt.Errorf("failed to insert IPTables rule: %v", err) } } return nil } -func teardownIPMasq(ipt IPTablesRules, ipn ip.IP4Net, lease *subnet.Lease) { - for _, rule := range rules(ipn, lease) { - log.Info("Deleting iptables rule: ", strings.Join(rule, " ")) +func teardownIPTables(ipt IPTables, rules []IPTablesRule) { + for _, rule := range rules { + log.Info("Deleting iptables rule: ", strings.Join(rule.rulespec, " ")) // We ignore errors here because if there's an error it's almost certainly because the rule // doesn't exist, which is fine (we don't need to delete rules that don't exist) - ipt.Delete("nat", "POSTROUTING", rule...) + ipt.Delete(rule.table, rule.chain, rule.rulespec...) } } diff --git a/network/ipmasq_test.go b/network/iptables_test.go similarity index 70% rename from network/ipmasq_test.go rename to network/iptables_test.go index 544857617..1b67be150 100644 --- a/network/ipmasq_test.go +++ b/network/iptables_test.go @@ -15,11 +15,12 @@ package network import ( - "github.com/coreos/flannel/pkg/ip" - "github.com/coreos/flannel/subnet" "net" "reflect" "testing" + + "github.com/coreos/flannel/pkg/ip" + "github.com/coreos/flannel/subnet" ) func lease() *subnet.Lease { @@ -29,14 +30,8 @@ func lease() *subnet.Lease { } } -type MockIPTablesRule struct { - table string - chain string - rulespec []string -} - type MockIPTables struct { - rules []MockIPTablesRule + rules []IPTablesRule } func (mock *MockIPTables) ruleIndex(table string, chain string, rulespec []string) int { @@ -67,33 +62,33 @@ func (mock *MockIPTables) Exists(table string, chain string, rulespec ...string) func (mock *MockIPTables) AppendUnique(table string, chain string, rulespec ...string) error { var ruleIndex = mock.ruleIndex(table, chain, rulespec) if ruleIndex == -1 { - mock.rules = append(mock.rules, MockIPTablesRule{table: table, chain: chain, rulespec: rulespec}) + mock.rules = append(mock.rules, IPTablesRule{table: table, chain: chain, rulespec: rulespec}) } return nil } func TestDeleteRules(t *testing.T) { ipt := &MockIPTables{} - setupIPMasq(ipt, ip.IP4Net{}, lease()) + setupIPTables(ipt, MasqRules(ip.IP4Net{}, lease())) if len(ipt.rules) != 4 { - t.Errorf("Should be 4 rules, there are actually %d: %#v", len(ipt.rules), ipt.rules) + t.Errorf("Should be 4 masqRules, there are actually %d: %#v", len(ipt.rules), ipt.rules) } - teardownIPMasq(ipt, ip.IP4Net{}, lease()) + teardownIPTables(ipt, MasqRules(ip.IP4Net{}, lease())) if len(ipt.rules) != 0 { - t.Errorf("Should be 0 rules, there are actually %d: %#v", len(ipt.rules), ipt.rules) + t.Errorf("Should be 0 masqRules, there are actually %d: %#v", len(ipt.rules), ipt.rules) } } func TestEnsureRules(t *testing.T) { - // If any rules are missing, they should be all deleted and recreated in the correct order + // If any masqRules are missing, they should be all deleted and recreated in the correct order ipt_correct := &MockIPTables{} - setupIPMasq(ipt_correct, ip.IP4Net{}, lease()) - // setup a mock instance where we delete some rules and run `ensureIPMasq` + setupIPTables(ipt_correct, MasqRules(ip.IP4Net{}, lease())) + // setup a mock instance where we delete some masqRules and run `ensureIPTables` ipt_recreate := &MockIPTables{} - setupIPMasq(ipt_recreate, ip.IP4Net{}, lease()) + setupIPTables(ipt_recreate, MasqRules(ip.IP4Net{}, lease())) ipt_recreate.rules = ipt_recreate.rules[0:2] - ensureIPMasq(ipt_recreate, ip.IP4Net{}, lease()) + ensureIPTables(ipt_recreate, MasqRules(ip.IP4Net{}, lease())) if !reflect.DeepEqual(ipt_recreate.rules, ipt_correct.rules) { - t.Errorf("iptables rules after ensureIPMasq are incorrected. Expected: %#v, Actual: %#v", ipt_recreate.rules, ipt_correct.rules) + t.Errorf("iptables masqRules after ensureIPTables are incorrected. Expected: %#v, Actual: %#v", ipt_recreate.rules, ipt_correct.rules) } }