Skip to content

Commit

Permalink
network/iptables: Add iptables rules to FORWARD chain
Browse files Browse the repository at this point in the history
To work around the Docker change from v1.13 which
changed the default FORWARD policy to DROP.

The change has bitten many many users.

The troubleshooting documentation is also updated talk about the issue.

Replaces PR flannel-io#862
Fixes flannel-io#834
Fixes flannel-io#823
Fixes flannel-io#609
Fixes flannel-io#799
  • Loading branch information
tomdee committed Nov 16, 2017
1 parent 317b7d1 commit 30fdb55
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 145 deletions.
8 changes: 7 additions & 1 deletion Documentation/troubleshooting.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

# General

## Connectivity
In Docker v1.13 and later, the default iptables forwarding policy was changed to `DROP`. For more detail on the Docker change, see the Docker [documentation](https://docs.docker.com/engine/userguide/networking/default_network/container-communication/#container-communication-between-hosts).

This problems manifests itself as connectivity problems between containers running on different hosts. To resolve it upgrade to the lateset version of flannel.


## Logging
Flannel uses the `glog` library but only supports logging to stderr. The severity level can't be changed but the verbosity can be changed with the `-v` option. Flannel does not make extensive use of the verbosity level but increasing th value from `0` (the default) will result in some additional logs. To get the most detailed logs, use `-v=10`

Expand Down Expand Up @@ -49,7 +55,7 @@ Flannel is known to scale to a very large number of hosts. A delay in contacting
Flannel relies on the underlying network so that's the first thing to check if you're seeing poor data plane performance.

There are two flannel specific choices that can have a big impact on performance
1) T1) The type of backend. For example, if encapsulation is used, `vxlan` will always perform better than `udp`. For maximum data plane performance, avoid encapsulation.
1) The type of backend. For example, if encapsulation is used, `vxlan` will always perform better than `udp`. For maximum data plane performance, avoid encapsulation.
2) The size of the MTU can have a large impact. To achieve maximum raw bandwidth, a network supporting a large MTU should be used. Flannel writes an MTU setting to the `subnet.env` file. This file is read by either the Docker daemon or the CNI flannel plugin which does the networking for individual containers. To troubleshoot, first ensure that the network interface that flannel is using has the right MTU. Then check that the correct MTU is written to the `subnet.env`. Finally, check that the containers have the correct MTU on their virtual ethernet device.


Expand Down
28 changes: 6 additions & 22 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (
"strings"
"syscall"

"github.com/coreos/go-iptables/iptables"
"github.com/coreos/pkg/flagutil"
log "github.com/golang/glog"
"golang.org/x/net/context"
Expand Down Expand Up @@ -285,9 +284,14 @@ func main() {

// Set up ipMasq if needed
if opts.ipMasq {
go setupIPMasq(config, bn)
go network.SetupAndEnsureIPTables(network.MasqRules(config.Network, bn.Lease()))
}

// Always enables forwarding rules. This is needed for Docker versions >1.13 (https://docs.docker.com/engine/userguide/networking/default_network/container-communication/#container-communication-between-hosts)
// In Docker 1.12 and earlier, the default FORWARD chain policy was ACCEPT.
// In Docker 1.13 and later, Docker sets the default policy of the FORWARD chain to DROP.
go network.SetupAndEnsureIPTables(network.ForwardRules(config.Network.String()))

if err := WriteSubnetFile(opts.subnetFile, config.Network, opts.ipMasq, bn); err != nil {
// Continue, even though it failed.
log.Warningf("Failed to write subnet file: %s", err)
Expand Down Expand Up @@ -553,26 +557,6 @@ func mustRunHealthz() {
}
}

func setupIPMasq(config *subnet.Config, bn backend.Network) {
ipt, err := iptables.New()
if err != nil {
// if we can't find iptables, give up and return
log.Errorf("Failed to set up IP Masquerade. iptables was not found: %v", err)
return
}
defer func() {
network.TeardownIPMasq(ipt, config.Network, bn.Lease())
}()
for {
// Ensure that all the rules exist every 5 seconds
if err := network.EnsureIPMasq(ipt, config.Network, bn.Lease()); err != nil {
log.Errorf("Failed to ensure IP Masquerade: %v", err)
}
time.Sleep(5 * time.Second)
}

}

func ReadSubnetFromSubnetFile(path string) ip.IP4Net {
var prevSubnet ip.IP4Net
if _, err := os.Stat(path); !os.IsNotExist(err) {
Expand Down
102 changes: 0 additions & 102 deletions network/ipmasq.go

This file was deleted.

141 changes: 141 additions & 0 deletions network/iptables.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
// Copyright 2015 flannel authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package network

import (
"fmt"
"strings"

log "github.com/golang/glog"

"time"

"github.com/coreos/flannel/pkg/ip"
"github.com/coreos/flannel/subnet"
"github.com/coreos/go-iptables/iptables"
)

type IPTables interface {
AppendUnique(table string, chain string, rulespec ...string) error
Delete(table string, chain string, rulespec ...string) error
Exists(table string, chain string, rulespec ...string) (bool, error)
}

type IPTablesRule struct {
table string
chain string
rulespec []string
}

func MasqRules(ipn ip.IP4Net, lease *subnet.Lease) []IPTablesRule {
n := ipn.String()
sn := lease.Subnet.String()

return []IPTablesRule{
// This rule makes sure we don't NAT traffic within overlay network (e.g. coming out of docker0)
{"nat", "POSTROUTING", []string{"-s", n, "-d", n, "-j", "RETURN"}},
// NAT if it's not multicast traffic
{"nat", "POSTROUTING", []string{"-s", n, "!", "-d", "224.0.0.0/4", "-j", "MASQUERADE"}},
// Prevent performing Masquerade on external traffic which arrives from a Node that owns the container/pod IP address
{"nat", "POSTROUTING", []string{"!", "-s", n, "-d", sn, "-j", "RETURN"}},
// Masquerade anything headed towards flannel from the host
{"nat", "POSTROUTING", []string{"!", "-s", n, "-d", n, "-j", "MASQUERADE"}},
}
}

func ForwardRules(flannelNetwork string) []IPTablesRule {
return []IPTablesRule{
// These rules allow traffic to be forwarded if it is to or from the flannel network range.
{"filter", "FORWARD", []string{"-s", flannelNetwork, "-j", "ACCEPT"}},
{"filter", "FORWARD", []string{"-d", flannelNetwork, "-j", "ACCEPT"}},
}
}

func ipTablesRulesExist(ipt IPTables, rules []IPTablesRule) (bool, error) {
for _, rule := range rules {
exists, err := ipt.Exists(rule.table, rule.chain, rule.rulespec...)
if err != nil {
// this shouldn't ever happen
return false, fmt.Errorf("failed to check rule existence", err)
}
if !exists {
return false, nil
}
}

return true, nil
}

func SetupAndEnsureIPTables(rules []IPTablesRule) {
ipt, err := iptables.New()
if err != nil {
// if we can't find iptables, give up and return
log.Errorf("Failed to setup IPTables. iptables binary was not found: %v", err)
return
}

defer func() {
teardownIPTables(ipt, rules)
}()

for {
// Ensure that all the iptables rules exist every 5 seconds
if err := ensureIPTables(ipt, rules); err != nil {
log.Errorf("Failed to ensure iptables rules: %v", err)
}

time.Sleep(5 * time.Second)
}
}

func ensureIPTables(ipt IPTables, rules []IPTablesRule) error {
exists, err := ipTablesRulesExist(ipt, rules)
if err != nil {
return fmt.Errorf("Error checking rule existence: %v", err)
}
if exists {
// if all the rules already exist, no need to do anything
return nil
}
// Otherwise, teardown all the rules and set them up again
// We do this because the order of the rules is important
log.Info("Some iptables rules are missing; deleting and recreating rules")
teardownIPTables(ipt, rules)
if err = setupIPTables(ipt, rules); err != nil {
return fmt.Errorf("Error setting up rules: %v", err)
}
return nil
}

func setupIPTables(ipt IPTables, rules []IPTablesRule) error {
for _, rule := range rules {
log.Info("Adding iptables rule: ", strings.Join(rule.rulespec, " "))
err := ipt.AppendUnique(rule.table, rule.chain, rule.rulespec...)
if err != nil {
return fmt.Errorf("failed to insert IPTables rule: %v", err)
}
}

return nil
}

func teardownIPTables(ipt IPTables, rules []IPTablesRule) {
for _, rule := range rules {
log.Info("Deleting iptables rule: ", strings.Join(rule.rulespec, " "))
// We ignore errors here because if there's an error it's almost certainly because the rule
// doesn't exist, which is fine (we don't need to delete rules that don't exist)
ipt.Delete(rule.table, rule.chain, rule.rulespec...)
}
}
35 changes: 15 additions & 20 deletions network/ipmasq_test.go → network/iptables_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@
package network

import (
"github.com/coreos/flannel/pkg/ip"
"github.com/coreos/flannel/subnet"
"net"
"reflect"
"testing"

"github.com/coreos/flannel/pkg/ip"
"github.com/coreos/flannel/subnet"
)

func lease() *subnet.Lease {
Expand All @@ -29,14 +30,8 @@ func lease() *subnet.Lease {
}
}

type MockIPTablesRule struct {
table string
chain string
rulespec []string
}

type MockIPTables struct {
rules []MockIPTablesRule
rules []IPTablesRule
}

func (mock *MockIPTables) ruleIndex(table string, chain string, rulespec []string) int {
Expand Down Expand Up @@ -67,33 +62,33 @@ func (mock *MockIPTables) Exists(table string, chain string, rulespec ...string)
func (mock *MockIPTables) AppendUnique(table string, chain string, rulespec ...string) error {
var ruleIndex = mock.ruleIndex(table, chain, rulespec)
if ruleIndex == -1 {
mock.rules = append(mock.rules, MockIPTablesRule{table: table, chain: chain, rulespec: rulespec})
mock.rules = append(mock.rules, IPTablesRule{table: table, chain: chain, rulespec: rulespec})
}
return nil
}

func TestDeleteRules(t *testing.T) {
ipt := &MockIPTables{}
SetupIPMasq(ipt, ip.IP4Net{}, lease())
setupIPTables(ipt, MasqRules(ip.IP4Net{}, lease()))
if len(ipt.rules) != 4 {
t.Errorf("Should be 4 rules, there are actually %d: %#v", len(ipt.rules), ipt.rules)
t.Errorf("Should be 4 masqRules, there are actually %d: %#v", len(ipt.rules), ipt.rules)
}
TeardownIPMasq(ipt, ip.IP4Net{}, lease())
teardownIPTables(ipt, MasqRules(ip.IP4Net{}, lease()))
if len(ipt.rules) != 0 {
t.Errorf("Should be 0 rules, there are actually %d: %#v", len(ipt.rules), ipt.rules)
t.Errorf("Should be 0 masqRules, there are actually %d: %#v", len(ipt.rules), ipt.rules)
}
}

func TestEnsureRules(t *testing.T) {
// If any rules are missing, they should be all deleted and recreated in the correct order
// If any masqRules are missing, they should be all deleted and recreated in the correct order
ipt_correct := &MockIPTables{}
SetupIPMasq(ipt_correct, ip.IP4Net{}, lease())
// setup a mock instance where we delete some rules and run `EnsureIPMasq`
setupIPTables(ipt_correct, MasqRules(ip.IP4Net{}, lease()))
// setup a mock instance where we delete some masqRules and run `ensureIPTables`
ipt_recreate := &MockIPTables{}
SetupIPMasq(ipt_recreate, ip.IP4Net{}, lease())
setupIPTables(ipt_recreate, MasqRules(ip.IP4Net{}, lease()))
ipt_recreate.rules = ipt_recreate.rules[0:2]
EnsureIPMasq(ipt_recreate, ip.IP4Net{}, lease())
ensureIPTables(ipt_recreate, MasqRules(ip.IP4Net{}, lease()))
if !reflect.DeepEqual(ipt_recreate.rules, ipt_correct.rules) {
t.Errorf("iptables rules after EnsureIPMasq are incorrected. Expected: %#v, Actual: %#v", ipt_recreate.rules, ipt_correct.rules)
t.Errorf("iptables masqRules after ensureIPTables are incorrected. Expected: %#v, Actual: %#v", ipt_recreate.rules, ipt_correct.rules)
}
}

0 comments on commit 30fdb55

Please sign in to comment.