diff --git a/drivers/bridge/bridge.go b/drivers/bridge/bridge.go index a30140feae..94927003e9 100644 --- a/drivers/bridge/bridge.go +++ b/drivers/bridge/bridge.go @@ -133,6 +133,7 @@ type driver struct { natChain *iptables.ChainInfo filterChain *iptables.ChainInfo isolationChain *iptables.ChainInfo + mangleChain *iptables.ChainInfo networks map[string]*bridgeNetwork store datastore.DataStore nlh *netlink.Handle @@ -253,15 +254,15 @@ func (n *bridgeNetwork) registerIptCleanFunc(clean iptableCleanFunc) { n.iptCleanFuncs = append(n.iptCleanFuncs, clean) } -func (n *bridgeNetwork) getDriverChains() (*iptables.ChainInfo, *iptables.ChainInfo, *iptables.ChainInfo, error) { +func (n *bridgeNetwork) getDriverChains() (*iptables.ChainInfo, *iptables.ChainInfo, *iptables.ChainInfo, *iptables.ChainInfo, error) { n.Lock() defer n.Unlock() if n.driver == nil { - return nil, nil, nil, types.BadRequestErrorf("no driver found") + return nil, nil, nil, nil, types.BadRequestErrorf("no driver found") } - return n.driver.natChain, n.driver.filterChain, n.driver.isolationChain, nil + return n.driver.natChain, n.driver.filterChain, n.driver.isolationChain, n.driver.mangleChain, nil } func (n *bridgeNetwork) getNetworkBridgeName() string { @@ -360,6 +361,7 @@ func (d *driver) configure(option map[string]interface{}) error { natChain *iptables.ChainInfo filterChain *iptables.ChainInfo isolationChain *iptables.ChainInfo + mangleChain *iptables.ChainInfo ) genericData, ok := option[netlabel.GenericData] @@ -394,7 +396,7 @@ func (d *driver) configure(option map[string]interface{}) error { } } removeIPChains() - natChain, filterChain, isolationChain, err = setupIPChains(config) + natChain, filterChain, isolationChain, mangleChain, err = setupIPChains(config) if err != nil { return err } @@ -406,6 +408,7 @@ func (d *driver) configure(option map[string]interface{}) error { d.natChain = natChain d.filterChain = filterChain d.isolationChain = isolationChain + d.mangleChain = mangleChain d.config = config d.Unlock() @@ -587,7 +590,7 @@ func (d *driver) CreateNetwork(id string, option map[string]interface{}, nInfo d } d.Unlock() - // Parse and validate the config. It should not be conflict with existing networks' config + // Parse and validate the config. It should not conflict with existing networks' config config, err := parseNetworkOptions(id, option) if err != nil { return err diff --git a/drivers/bridge/bridge_test.go b/drivers/bridge/bridge_test.go index a44a595968..033db78541 100644 --- a/drivers/bridge/bridge_test.go +++ b/drivers/bridge/bridge_test.go @@ -994,7 +994,7 @@ func TestCleanupIptableRules(t *testing.T) { {Name: DockerChain, Table: iptables.Filter}, {Name: IsolationChain, Table: iptables.Filter}, } - if _, _, _, err := setupIPChains(&configuration{EnableIPTables: true}); err != nil { + if _, _, _, _, err := setupIPChains(&configuration{EnableIPTables: true}); err != nil { t.Fatalf("Error setting up ip chains: %v", err) } for _, chainInfo := range bridgeChain { diff --git a/drivers/bridge/setup_ip_tables.go b/drivers/bridge/setup_ip_tables.go index 862d9e4491..f47fb1de9d 100644 --- a/drivers/bridge/setup_ip_tables.go +++ b/drivers/bridge/setup_ip_tables.go @@ -14,17 +14,17 @@ const ( IsolationChain = "DOCKER-ISOLATION" ) -func setupIPChains(config *configuration) (*iptables.ChainInfo, *iptables.ChainInfo, *iptables.ChainInfo, error) { +func setupIPChains(config *configuration) (*iptables.ChainInfo, *iptables.ChainInfo, *iptables.ChainInfo, *iptables.ChainInfo, error) { // Sanity check. if config.EnableIPTables == false { - return nil, nil, nil, fmt.Errorf("cannot create new chains, EnableIPTable is disabled") + return nil, nil, nil, nil, fmt.Errorf("cannot create new chains, EnableIPTable is disabled") } hairpinMode := !config.EnableUserlandProxy natChain, err := iptables.NewChain(DockerChain, iptables.Nat, hairpinMode) if err != nil { - return nil, nil, nil, fmt.Errorf("failed to create NAT chain: %v", err) + return nil, nil, nil, nil, fmt.Errorf("failed to create NAT chain: %v", err) } defer func() { if err != nil { @@ -36,7 +36,7 @@ func setupIPChains(config *configuration) (*iptables.ChainInfo, *iptables.ChainI filterChain, err := iptables.NewChain(DockerChain, iptables.Filter, false) if err != nil { - return nil, nil, nil, fmt.Errorf("failed to create FILTER chain: %v", err) + return nil, nil, nil, nil, fmt.Errorf("failed to create FILTER chain: %v", err) } defer func() { if err != nil { @@ -48,14 +48,26 @@ func setupIPChains(config *configuration) (*iptables.ChainInfo, *iptables.ChainI isolationChain, err := iptables.NewChain(IsolationChain, iptables.Filter, false) if err != nil { - return nil, nil, nil, fmt.Errorf("failed to create FILTER isolation chain: %v", err) + return nil, nil, nil, nil, fmt.Errorf("failed to create FILTER isolation chain: %v", err) } + defer func() { + if err != nil { + if err := iptables.RemoveExistingChain(IsolationChain, iptables.Filter); err != nil { + logrus.Warnf("failed on removing iptables NAT chain on cleanup: %v", err) + } + } + }() if err := addReturnRule(IsolationChain); err != nil { - return nil, nil, nil, err + return nil, nil, nil, nil, err } - return natChain, filterChain, isolationChain, nil + mangleChain, err := iptables.NewChain(DockerChain, iptables.Mangle, false) + if err != nil { + return nil, nil, nil, nil, err + } + + return natChain, filterChain, isolationChain, mangleChain, nil } func (n *bridgeNetwork) setupIPTables(config *networkConfiguration, i *bridgeInterface) error { @@ -92,7 +104,7 @@ func (n *bridgeNetwork) setupIPTables(config *networkConfiguration, i *bridgeInt n.registerIptCleanFunc(func() error { return setupIPTablesInternal(config.BridgeName, maskedAddrv4, config.EnableICC, config.EnableIPMasquerade, hairpinMode, false) }) - natChain, filterChain, _, err := n.getDriverChains() + natChain, filterChain, _, mangleChain, err := n.getDriverChains() if err != nil { return fmt.Errorf("Failed to setup IP tables, cannot acquire chain info %s", err.Error()) } @@ -102,6 +114,11 @@ func (n *bridgeNetwork) setupIPTables(config *networkConfiguration, i *bridgeInt return fmt.Errorf("Failed to program NAT chain: %s", err.Error()) } + err = iptables.ProgramChain(mangleChain, config.BridgeName, hairpinMode, true) + if err != nil { + return fmt.Errorf("Failed to program NAT chain: %s", err.Error()) + } + err = iptables.ProgramChain(filterChain, config.BridgeName, hairpinMode, true) if err != nil { return fmt.Errorf("Failed to program FILTER chain: %s", err.Error()) @@ -131,12 +148,13 @@ type iptRule struct { func setupIPTablesInternal(bridgeIface string, addr net.Addr, icc, ipmasq, hairpin, enable bool) error { var ( - address = addr.String() - natRule = iptRule{table: iptables.Nat, chain: "POSTROUTING", preArgs: []string{"-t", "nat"}, args: []string{"-s", address, "!", "-o", bridgeIface, "-j", "MASQUERADE"}} - hpNatRule = iptRule{table: iptables.Nat, chain: "POSTROUTING", preArgs: []string{"-t", "nat"}, args: []string{"-m", "addrtype", "--src-type", "LOCAL", "-o", bridgeIface, "-j", "MASQUERADE"}} - skipDNAT = iptRule{table: iptables.Nat, chain: DockerChain, preArgs: []string{"-t", "nat"}, args: []string{"-i", bridgeIface, "-j", "RETURN"}} - outRule = iptRule{table: iptables.Filter, chain: "FORWARD", args: []string{"-i", bridgeIface, "!", "-o", bridgeIface, "-j", "ACCEPT"}} - inRule = iptRule{table: iptables.Filter, chain: "FORWARD", args: []string{"-o", bridgeIface, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", "ACCEPT"}} + address = addr.String() + natRule = iptRule{table: iptables.Nat, chain: "POSTROUTING", preArgs: []string{"-t", "nat"}, args: []string{"-s", address, "!", "-o", bridgeIface, "-j", "MASQUERADE"}} + hpNatRule = iptRule{table: iptables.Nat, chain: "POSTROUTING", preArgs: []string{"-t", "nat"}, args: []string{"-m", "addrtype", "--src-type", "LOCAL", "-o", bridgeIface, "-j", "MASQUERADE"}} + skipDNAT = iptRule{table: iptables.Nat, chain: DockerChain, preArgs: []string{"-t", "nat"}, args: []string{"-i", bridgeIface, "-j", "RETURN"}} + skipMangle = iptRule{table: iptables.Mangle, chain: DockerChain, preArgs: []string{"-t", "mangle"}, args: []string{"-i", bridgeIface, "-j", "RETURN"}} + outRule = iptRule{table: iptables.Filter, chain: "FORWARD", args: []string{"-i", bridgeIface, "!", "-o", bridgeIface, "-j", "ACCEPT"}} + inRule = iptRule{table: iptables.Filter, chain: "FORWARD", args: []string{"-o", bridgeIface, "-m", "conntrack", "--ctstate", "RELATED,ESTABLISHED", "-j", "ACCEPT"}} ) // Set NAT. @@ -150,6 +168,9 @@ func setupIPTablesInternal(bridgeIface string, addr net.Addr, icc, ipmasq, hairp if err := programChainRule(skipDNAT, "SKIP DNAT", enable); err != nil { return err } + if err := programChainRule(skipMangle, "SKIP MANGLE", enable); err != nil { + return err + } } // In hairpin mode, masquerade traffic from localhost @@ -324,6 +345,7 @@ func ensureJumpRule(fromChain, toChain string) error { func removeIPChains() { for _, chainInfo := range []iptables.ChainInfo{ {Name: DockerChain, Table: iptables.Nat}, + {Name: DockerChain, Table: iptables.Mangle}, {Name: DockerChain, Table: iptables.Filter}, {Name: IsolationChain, Table: iptables.Filter}, } { diff --git a/drivers/bridge/setup_ip_tables_test.go b/drivers/bridge/setup_ip_tables_test.go index edfb2907fc..789dd8132c 100644 --- a/drivers/bridge/setup_ip_tables_test.go +++ b/drivers/bridge/setup_ip_tables_test.go @@ -116,7 +116,7 @@ func assertIPTableChainProgramming(rule iptRule, descr string, t *testing.T) { func assertChainConfig(d *driver, t *testing.T) { var err error - d.natChain, d.filterChain, d.isolationChain, err = setupIPChains(d.config) + d.natChain, d.filterChain, d.isolationChain, d.mangleChain, err = setupIPChains(d.config) if err != nil { t.Fatal(err) } diff --git a/iptables/firewalld_test.go b/iptables/firewalld_test.go index 1ac11a9adf..e44440fc97 100644 --- a/iptables/firewalld_test.go +++ b/iptables/firewalld_test.go @@ -20,6 +20,9 @@ func TestReloaded(t *testing.T) { var fwdChain *ChainInfo fwdChain, err = NewChain("FWD", Filter, false) + if err != nil { + t.Fatal(err) + } bridgeName := "lo" err = ProgramChain(fwdChain, bridgeName, false, true) diff --git a/iptables/iptables.go b/iptables/iptables.go index b7fe816261..740c08e006 100644 --- a/iptables/iptables.go +++ b/iptables/iptables.go @@ -133,12 +133,12 @@ func ProgramChain(c *ChainInfo, bridgeName string, hairpinMode, enable bool) err "--dst-type", "LOCAL", "-j", c.Name} if !Exists(Nat, "PREROUTING", preroute...) && enable { - if err := c.Prerouting(Append, preroute...); err != nil { - return fmt.Errorf("Failed to inject docker in PREROUTING chain: %s", err) + if err := c.Prerouting(Nat, Append, preroute...); err != nil { + return fmt.Errorf("Failed to inject docker in PREROUTING nat chain: %v", err) } } else if Exists(Nat, "PREROUTING", preroute...) && !enable { - if err := c.Prerouting(Delete, preroute...); err != nil { - return fmt.Errorf("Failed to remove docker in PREROUTING chain: %s", err) + if err := c.Prerouting(Nat, Delete, preroute...); err != nil { + return fmt.Errorf("Failed to remove docker in PREROUTING nat chain: %v", err) } } output := []string{ @@ -157,6 +157,21 @@ func ProgramChain(c *ChainInfo, bridgeName string, hairpinMode, enable bool) err return fmt.Errorf("Failed to inject docker in OUTPUT chain: %s", err) } } + case Mangle: + preroute := []string{ + "-m", "addrtype", "--dst-type", "LOCAL", + "-j", c.Name, + "!", "-d", "127.0.0.0/8", + } + if !Exists(Mangle, "PREROUTING", preroute...) && enable { + if err := c.Prerouting(Mangle, Append, preroute...); err != nil { + return fmt.Errorf("Failed to inject docker in PREROUTING mangle chain: %s", err) + } + } else if Exists(Mangle, "PREROUTING", preroute...) && !enable { + if err := c.Prerouting(Mangle, Delete, preroute...); err != nil { + return fmt.Errorf("Failed to remove docker in PREROUTING mangle chain: %s", err) + } + } case Filter: if bridgeName == "" { return fmt.Errorf("Could not program chain %s/%s, missing bridge name.", @@ -197,8 +212,8 @@ func RemoveExistingChain(name string, table Table) error { return c.Remove() } -// Forward adds forwarding rule to 'filter' table and corresponding nat rule to 'nat' table. -func (c *ChainInfo) Forward(action Action, ip net.IP, port int, proto, destAddr string, destPort int, bridgeName string) error { +// Mark sets up a iptables mangle rule to mark an incoming packet with the given mark value +func (c *ChainInfo) Mark(action Action, proto string, ip net.IP, port, mark int) error { daddr := ip.String() if ip.IsUnspecified() { // iptables interprets "0.0.0.0" as "0.0.0.0/32", whereas we @@ -207,42 +222,100 @@ func (c *ChainInfo) Forward(action Action, ip net.IP, port int, proto, destAddr daddr = "0/0" } - args := []string{ + args := []string{"-t", string(Mangle), string(action), c.Name, "-p", proto, "-d", daddr, "--dport", strconv.Itoa(port), + "-j", "MARK", + "--set-mark", strconv.Itoa(mark), + } + + output, err := Raw(args...) + if err != nil { + return err + } + if len(output) != 0 { + return ChainError{Chain: "PREROUTING", Output: output} + } + return nil +} + +// Forward adds forwarding rule to 'filter' table and corresponding nat rule to 'nat' table. +func (c *ChainInfo) Forward(action Action, ip net.IP, port int, proto string, newIP net.IP, newPort int, bridgeName string) error { + args := []string{"-t", string(Nat), string(action), c.Name, + "-p", proto, + "-d", ip.String(), + "--dport", strconv.Itoa(port), "-j", "DNAT", - "--to-destination", net.JoinHostPort(destAddr, strconv.Itoa(destPort))} + "--to-destination", net.JoinHostPort(newIP.String(), strconv.Itoa(newPort))} if !c.HairpinMode { args = append(args, "!", "-i", bridgeName) } - if err := ProgramRule(Nat, c.Name, action, args); err != nil { + + output, err := Raw(args...) + if err != nil { return err } + if len(output) != 0 { + return ChainError{Chain: "FORWARD", Output: output} + } - args = []string{ + output, err = Raw("-t", string(Filter), string(action), c.Name, "!", "-i", bridgeName, "-o", bridgeName, "-p", proto, - "-d", destAddr, - "--dport", strconv.Itoa(destPort), - "-j", "ACCEPT", + "-d", newIP.String(), + "--dport", strconv.Itoa(newPort), + "-j", "ACCEPT") + if err != nil { + return err } - if err := ProgramRule(Filter, c.Name, action, args); err != nil { + if len(output) != 0 { + return ChainError{Chain: "FORWARD", Output: output} + } + return nil +} + +// Masq sets up iptables masquerade rules for the given address/proto +func (c *ChainInfo) Masq(action Action, proto string, ip net.IP, port int) error { + output, err := Raw("-t", string(Nat), string(action), "POSTROUTING", + "-p", proto, + "-s", ip.String(), + "-d", ip.String(), + "--dport", strconv.Itoa(port), + "-j", "MASQUERADE") + if err != nil { return err } + if len(output) != 0 { + return ChainError{Chain: "FORWARD", Output: output} + } + return nil +} - args = []string{ +// Hairpin sets required iptables rules for hairpin nating +func (c *ChainInfo) Hairpin(action Action, proto string, destIP net.IP, destPort int, sourceIP net.IP, newPort int) error { + destAddr := destIP.String() + if destIP.IsUnspecified() { + // iptables interprets "0.0.0.0" as "0.0.0.0/32", whereas we + // want "0.0.0.0/0". "0/0" is correctly interpreted as "any + // value" by both iptables and ip6tables. + destAddr = "0/0" + } + output, err := Raw("-t", string(Nat), string(action), c.Name, "-p", proto, - "-s", destAddr, + "-s", sourceIP.String(), "-d", destAddr, "--dport", strconv.Itoa(destPort), - "-j", "MASQUERADE", - } - if err := ProgramRule(Nat, "POSTROUTING", action, args); err != nil { + "-j", "DNAT", + "--to-destination", net.JoinHostPort(sourceIP.String(), strconv.Itoa(newPort)), + ) + if err != nil { return err } - + if len(output) > 0 { + return ChainError{Chain: "PREROUTING", Output: output} + } return nil } @@ -281,8 +354,8 @@ func ProgramRule(table Table, chain string, action Action, args []string) error } // Prerouting adds linking rule to nat/PREROUTING chain. -func (c *ChainInfo) Prerouting(action Action, args ...string) error { - a := []string{"-t", string(Nat), string(action), "PREROUTING"} +func (c *ChainInfo) Prerouting(table Table, action Action, args ...string) error { + a := []string{"-t", string(table), string(action), "PREROUTING"} if len(args) > 0 { a = append(a, args...) } @@ -311,13 +384,18 @@ func (c *ChainInfo) Output(action Action, args ...string) error { // Remove removes the chain. func (c *ChainInfo) Remove() error { // Ignore errors - This could mean the chains were never set up - if c.Table == Nat { - c.Prerouting(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "-j", c.Name) + switch c.Table { + case Nat: + c.Prerouting(c.Table, Delete, "-m", "addrtype", "--dst-type", "LOCAL", "-j", c.Name) c.Output(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "!", "--dst", "127.0.0.0/8", "-j", c.Name) c.Output(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "-j", c.Name) // Created in versions <= 0.1.6 - c.Prerouting(Delete) + c.Prerouting(c.Table, Delete) c.Output(Delete) + case Mangle: + c.Prerouting(Mangle, Delete, "-m", "addrtype", "--dst-type", "LOCAL", "-j", c.Name, "!", "-d", "127.0.0.0/8") + Raw("-t", string(Mangle), "-F", c.Name) + Raw("-t", string(Mangle), "-X", c.Name) } Raw("-t", string(c.Table), "-F", c.Name) Raw("-t", string(c.Table), "-X", c.Name) diff --git a/iptables/iptables_test.go b/iptables/iptables_test.go index 324d123da1..a453ea4ef3 100644 --- a/iptables/iptables_test.go +++ b/iptables/iptables_test.go @@ -15,6 +15,7 @@ const chainName = "DOCKEREST" var natChain *ChainInfo var filterChain *ChainInfo +var mangleChain *ChainInfo var bridgeName string func TestNewChain(t *testing.T) { @@ -22,6 +23,9 @@ func TestNewChain(t *testing.T) { bridgeName = "lo" natChain, err = NewChain(chainName, Nat, false) + if err != nil { + t.Fatal(err) + } err = ProgramChain(natChain, bridgeName, false, true) if err != nil { t.Fatal(err) @@ -32,12 +36,18 @@ func TestNewChain(t *testing.T) { if err != nil { t.Fatal(err) } + + mangleChain, err = NewChain(chainName, Mangle, false) + err = ProgramChain(mangleChain, bridgeName, false, true) + if err != nil { + t.Fatal(err) + } } func TestForward(t *testing.T) { ip := net.ParseIP("192.168.1.1") port := 1234 - dstAddr := "172.17.0.1" + dstAddr := net.ParseIP("172.17.0.1") dstPort := 4321 proto := "tcp" @@ -52,7 +62,7 @@ func TestForward(t *testing.T) { "-p", proto, "--dport", strconv.Itoa(port), "-j", "DNAT", - "--to-destination", dstAddr + ":" + strconv.Itoa(dstPort), + "--to-destination", dstAddr.String() + ":" + strconv.Itoa(dstPort), "!", "-i", bridgeName, } @@ -63,7 +73,7 @@ func TestForward(t *testing.T) { filterRule := []string{ "!", "-i", bridgeName, "-o", bridgeName, - "-d", dstAddr, + "-d", dstAddr.String(), "-p", proto, "--dport", strconv.Itoa(dstPort), "-j", "ACCEPT", @@ -73,11 +83,23 @@ func TestForward(t *testing.T) { t.Fatalf("filter rule does not exist") } +} + +func TestMasq(t *testing.T) { + ip := net.ParseIP("192.168.1.1") + port := 1234 + proto := "tcp" + + err := natChain.Masq(Insert, proto, ip, port) + if err != nil { + t.Fatal(err) + } + masqRule := []string{ - "-d", dstAddr, - "-s", dstAddr, + "-d", ip.String(), + "-s", ip.String(), "-p", proto, - "--dport", strconv.Itoa(dstPort), + "--dport", strconv.Itoa(port), "-j", "MASQUERADE", } @@ -132,7 +154,7 @@ func TestPrerouting(t *testing.T) { "-i", "lo", "-d", "192.168.1.1"} - err := natChain.Prerouting(Insert, args...) + err := natChain.Prerouting(Nat, Insert, args...) if err != nil { t.Fatal(err) } @@ -190,7 +212,7 @@ func RunConcurrencyTest(t *testing.T, allowXlock bool) { ip := net.ParseIP("192.168.1.1") port := 1234 - dstAddr := "172.17.0.1" + dstAddr := net.ParseIP("172.17.0.1") dstPort := 4321 proto := "tcp" @@ -226,6 +248,12 @@ func TestCleanup(t *testing.T) { t.Fatal(err) } + mangleChain.Remove() + err = RemoveExistingChain(chainName, Mangle) + if err != nil { + t.Fatal(err) + } + rules, err = exec.Command("iptables-save").Output() if err != nil { t.Fatal(err) diff --git a/portmapper/mapper.go b/portmapper/mapper.go index 7f2a67c89f..a4107161d3 100644 --- a/portmapper/mapper.go +++ b/portmapper/mapper.go @@ -8,7 +8,9 @@ import ( "github.com/Sirupsen/logrus" "github.com/docker/libnetwork/iptables" + "github.com/docker/libnetwork/ipvs" "github.com/docker/libnetwork/portallocator" + "github.com/vishvananda/netlink" ) type mapping struct { @@ -18,7 +20,10 @@ type mapping struct { container net.Addr } -var newProxy = newProxyCommand +var ( + newProxy = newProxyCommand + loopbackIP = net.ParseIP("127.0.0.1") +) var ( // ErrUnknownBackendAddressType refers to an unknown container or unsupported address type @@ -41,6 +46,7 @@ type PortMapper struct { proxyPath string Allocator *portallocator.PortAllocator + ipvs *ipvs.Handle } // New returns a new instance of PortMapper @@ -50,11 +56,19 @@ func New(proxyPath string) *PortMapper { // NewWithPortAllocator returns a new instance of PortMapper which will use the specified PortAllocator func NewWithPortAllocator(allocator *portallocator.PortAllocator, proxyPath string) *PortMapper { - return &PortMapper{ + pm := &PortMapper{ currentMappings: make(map[string]*mapping), Allocator: allocator, proxyPath: proxyPath, } + ipvs, err := ipvs.New("") + if err != nil { + // this should never happen unless something is really wrong, and probably + // other things are broken too. + panic("error setting up ipvs: " + err.Error()) + } + pm.ipvs = ipvs + return pm } // SetIptablesChain sets the specified chain into portmapper @@ -79,7 +93,7 @@ func (pm *PortMapper) MapRange(container net.Addr, hostIP net.IP, hostPortStart, allocatedHostPort int ) - switch container.(type) { + switch addr := container.(type) { case *net.TCPAddr: proto = "tcp" if allocatedHostPort, err = pm.Allocator.RequestPortInRange(hostIP, proto, hostPortStart, hostPortEnd); err != nil { @@ -137,28 +151,41 @@ func (pm *PortMapper) MapRange(container net.Addr, hostIP net.IP, hostPortStart, } containerIP, containerPort := getIPAndPort(m.container) - if hostIP.To4() != nil { - if err := pm.forward(iptables.Append, m.proto, hostIP, allocatedHostPort, containerIP.String(), containerPort); err != nil { - return nil, err - } - } + fwMark := getFWMark(m.proto, allocatedHostPort) cleanup := func() error { // need to undo the iptables rules before we return m.userlandProxy.Stop() - if hostIP.To4() != nil { - pm.forward(iptables.Delete, m.proto, hostIP, allocatedHostPort, containerIP.String(), containerPort) - if err := pm.Allocator.ReleasePort(hostIP, m.proto, allocatedHostPort); err != nil { - return err + if !hostIP.IsLoopback() { + pm.deleteService(fwMark) + } + pm.forward(iptables.Delete, m.proto, hostIP, allocatedHostPort, containerIP, containerPort, fwMark) + if err := pm.Allocator.ReleasePort(hostIP, m.proto, allocatedHostPort); err != nil { + return err + } + return nil + } + + // loopback can't use ipvs + if !hostIP.IsLoopback() { + if err := pm.createService(allocatedHostPort, containerIP, containerPort, fwMark); err != nil { + if err := cleanup(); err != nil { + logrus.Warnf("Error while cleaning up port forwards: %v", err) } } + } - return nil + // Need to setup special routing for localhost which can't go through ipvs + if err := pm.forward(iptables.Append, m.proto, hostIP, allocatedHostPort, containerIP, containerPort, fwMark); err != nil { + if err := cleanup(); err != nil { + logrus.Warnf("Error while cleaning up port forwards: %v", err) + } + return nil, err } if err := m.userlandProxy.Start(); err != nil { if err := cleanup(); err != nil { - return nil, fmt.Errorf("Error during port allocation cleanup: %v", err) + logrus.Errorf("Error during port allocation cleanup: %v", err) } return nil, err } @@ -186,8 +213,16 @@ func (pm *PortMapper) Unmap(host net.Addr) error { containerIP, containerPort := getIPAndPort(data.container) hostIP, hostPort := getIPAndPort(data.host) - if err := pm.forward(iptables.Delete, data.proto, hostIP, hostPort, containerIP.String(), containerPort); err != nil { - logrus.Errorf("Error on iptables delete: %s", err) + fwMark := getFWMark(data.proto, hostPort) + + if err := pm.forward(iptables.Delete, data.proto, hostIP, hostPort, containerIP, containerPort, fwMark); err != nil { + logrus.Errorf("Error cleaning up port map: %s", err) + } + + if !hostIP.IsLoopback() { + if err := pm.deleteService(fwMark); err != nil { + logrus.Errorf("Error removing port map: %s", err) + } } switch a := host.(type) { @@ -200,6 +235,7 @@ func (pm *PortMapper) Unmap(host net.Addr) error { } //ReMapAll will re-apply all port mappings +// this is only used by firewalld func (pm *PortMapper) ReMapAll() { pm.lock.Lock() defer pm.lock.Unlock() @@ -207,7 +243,9 @@ func (pm *PortMapper) ReMapAll() { for _, data := range pm.currentMappings { containerIP, containerPort := getIPAndPort(data.container) hostIP, hostPort := getIPAndPort(data.host) - if err := pm.forward(iptables.Append, data.proto, hostIP, hostPort, containerIP.String(), containerPort); err != nil { + fwMark := getFWMark(data.proto, containerPort) + + if err := pm.forward(iptables.Append, data.proto, hostIP, hostPort, containerIP, containerPort, fwMark); err != nil { logrus.Errorf("Error on iptables add: %s", err) } } @@ -233,9 +271,73 @@ func getIPAndPort(a net.Addr) (net.IP, int) { return nil, 0 } -func (pm *PortMapper) forward(action iptables.Action, proto string, sourceIP net.IP, sourcePort int, containerIP string, containerPort int) error { +func (pm *PortMapper) forward(action iptables.Action, proto string, hostIP net.IP, hostPort int, containerIP net.IP, containerPort int, mark int) error { if pm.chain == nil { return nil } - return pm.chain.Forward(action, sourceIP, sourcePort, proto, containerIP, containerPort, pm.bridgeName) + isLoopback := hostIP.IsLoopback() + isUnspec := hostIP.IsUnspecified() + + if !isLoopback { + // Can't use ipvs for loopback, so no need to mark + if err := pm.chain.Mark(action, proto, hostIP, hostPort, mark); err != nil { + return err + } + } + + // with ipvs, we only need to forward on localhost and the actual hairpin + if pm.chain.HairpinMode { + if isUnspec || isLoopback { + if err := pm.chain.Forward(action, loopbackIP, hostPort, proto, containerIP, containerPort, pm.bridgeName); err != nil { + return err + } + } + + if !isLoopback { + // allow the container to talk to itself over the nat'd address + // ipvs does not support hairpin + if err := pm.chain.Hairpin(action, proto, hostIP, hostPort, containerIP, containerPort); err != nil { + return err + } + } + } + return pm.chain.Masq(action, proto, containerIP, containerPort) +} + +func (pm *PortMapper) createService(hostPort int, containerIP net.IP, containerPort, mark int) error { + service := &ipvs.Service{ + FWMark: uint32(mark), + AddressFamily: netlink.FAMILY_V4, + SchedName: ipvs.LeastConnection, + } + if err := pm.ipvs.NewService(service); err != nil { + return err + } + dest := &ipvs.Destination{ + AddressFamily: netlink.FAMILY_V4, + Address: containerIP, + Port: uint16(containerPort), + ConnectionFlags: ipvs.ConnectionFlagMasq, + Weight: 1, + } + return pm.ipvs.NewDestination(service, dest) +} + +func (pm *PortMapper) deleteService(mark int) error { + return pm.ipvs.DelService(&ipvs.Service{ + FWMark: uint32(mark), + AddressFamily: netlink.FAMILY_V4, + SchedName: ipvs.LeastConnection, + }) +} + +func getFWMark(proto string, port int) int { + var prefix int + switch proto { + case "udp": + prefix = 5500000 + case "tcp": + prefix = 5600000 + } + return prefix + port }