Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Netlink timeouts #1976

Merged
merged 2 commits into from
Dec 4, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions drivers/overlay/ov_network.go
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,12 @@ func (n *network) initSandbox(restore bool) error {
var nlSock *nl.NetlinkSocket
sbox.InvokeFunc(func() {
nlSock, err = nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH)
if err != nil {
return
}
// set the receive timeout to not remain stuck on the RecvFrom if the fd gets closed
tv := syscall.NsecToTimeval(soTimeout.Nanoseconds())
err = nlSock.SetReceiveTimeout(&tv)
})
n.setNetlinkSocket(nlSock)

Expand All @@ -721,6 +727,11 @@ func (n *network) watchMiss(nlSock *nl.NetlinkSocket) {
// The netlink socket got closed, simply exit to not leak this goroutine
return
}
// When the receive timeout expires the receive will return EAGAIN
if err == syscall.EAGAIN {
// we continue here to avoid spam for timeouts
continue
}
logrus.Errorf("Failed to receive from netlink: %v ", err)
continue
}
Expand Down
36 changes: 36 additions & 0 deletions drivers/overlay/overlay_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package overlay

import (
"context"
"net"
"syscall"
"testing"
"time"

Expand All @@ -12,6 +14,7 @@ import (
"github.com/docker/libnetwork/driverapi"
"github.com/docker/libnetwork/netlabel"
_ "github.com/docker/libnetwork/testutils"
"github.com/vishvananda/netlink/nl"
)

func init() {
Expand Down Expand Up @@ -135,3 +138,36 @@ func TestOverlayType(t *testing.T) {
dt.d.Type())
}
}

// Test that the netlink socket close unblock the watchMiss to avoid deadlock
func TestNetlinkSocket(t *testing.T) {
// This is the same code used by the overlay driver to create the netlink interface
// for the watch miss
nlSock, err := nl.Subscribe(syscall.NETLINK_ROUTE, syscall.RTNLGRP_NEIGH)
if err != nil {
t.Fatal()
}
// set the receive timeout to not remain stuck on the RecvFrom if the fd gets closed
tv := syscall.NsecToTimeval(soTimeout.Nanoseconds())
err = nlSock.SetReceiveTimeout(&tv)
if err != nil {
t.Fatal()
}
n := &network{id: "testnetid"}
ch := make(chan error)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
go func() {
n.watchMiss(nlSock)
ch <- nil
}()
time.Sleep(5 * time.Second)
nlSock.Close()
select {
case <-ch:
case <-ctx.Done():
{
t.Fatalf("Timeout expired")
}
}
}
16 changes: 16 additions & 0 deletions ipvs/ipvs.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,19 @@ package ipvs
import (
"net"
"syscall"
"time"

"fmt"

"github.com/vishvananda/netlink/nl"
"github.com/vishvananda/netns"
)

const (
netlinkRecvSocketsTimeout = 3 * time.Second
netlinkSendSocketTimeout = 30 * time.Second
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is set to ah higher value to avoid timeout error in case the system is busy. I noticed that the error coming from the write failure to IPVS is not percolated up, so if the addLBBackend backend failed in the configuration, the error is not handled at all. I will follow up on this on a different PR though because the change will be potentially non trivial.

)

// Service defines an IPVS service in its entirety.
type Service struct {
// Virtual service address.
Expand Down Expand Up @@ -82,6 +89,15 @@ func New(path string) (*Handle, error) {
if err != nil {
return nil, err
}
// Add operation timeout to avoid deadlocks
tv := syscall.NsecToTimeval(netlinkSendSocketTimeout.Nanoseconds())
if err := sock.SetSendTimeout(&tv); err != nil {
return nil, err
}
tv = syscall.NsecToTimeval(netlinkRecvSocketsTimeout.Nanoseconds())
if err := sock.SetReceiveTimeout(&tv); err != nil {
return nil, err
}

return &Handle{sock: sock}, nil
}
Expand Down
11 changes: 7 additions & 4 deletions ipvs/netlink.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,6 @@ func newGenlRequest(familyID int, cmd uint8) *nl.NetlinkRequest {
}

func execute(s *nl.NetlinkSocket, req *nl.NetlinkRequest, resType uint16) ([][]byte, error) {
var (
err error
)

if err := s.Send(req); err != nil {
return nil, err
}
Expand All @@ -222,6 +218,13 @@ done:
for {
msgs, err := s.Receive()
if err != nil {
if s.GetFd() == -1 {
return nil, fmt.Errorf("Socket got closed on receive")
}
if err == syscall.EAGAIN {
// timeout fired
continue
}
return nil, err
}
for _, m := range msgs {
Expand Down
2 changes: 1 addition & 1 deletion vendor.conf
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ github.com/sirupsen/logrus v1.0.3
github.com/stretchr/testify dab07ac62d4905d3e48d17dc549c684ac3b7c15a
github.com/syndtr/gocapability db04d3cc01c8b54962a58ec7e491717d06cfcc16
github.com/ugorji/go f1f1a805ed361a0e078bb537e4ea78cd37dcf065
github.com/vishvananda/netlink bd6d5de5ccef2d66b0a26177928d0d8895d7f969
github.com/vishvananda/netlink b2de5d10e38ecce8607e6b438b6d174f389a004e
github.com/vishvananda/netns 604eaf189ee867d8c147fafc28def2394e878d25
golang.org/x/crypto 558b6879de74bc843225cde5686419267ff707ca
golang.org/x/net 7dcfb8076726a3fdd9353b6b8a1f1b6be6811bd6
Expand Down
10 changes: 6 additions & 4 deletions vendor/github.com/vishvananda/netlink/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

52 changes: 40 additions & 12 deletions vendor/github.com/vishvananda/netlink/addr_linux.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

115 changes: 115 additions & 0 deletions vendor/github.com/vishvananda/netlink/bridge_linux.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading