-
Notifications
You must be signed in to change notification settings - Fork 712
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add eBPF connection tracking without dependencies on kernel headers
Based on work from Lorenzo, updated by Iago, Alban, Alessandro and Michael. This PR adds connection tracking using eBPF. This feature is not enabled by default. For now, you can enable it by launching scope with the following command: ``` sudo ./scope launch --probe.ebpf.connections=true ``` This patch allows scope to get notified of every connection event, without relying on the parsing of /proc/$pid/net/tcp{,6} and /proc/$pid/fd/*, and therefore improve performance. We vendor https://github.com/iovisor/gobpf in Scope to load the pre-compiled ebpf program and https://github.com/weaveworks/tcptracer-bpf to guess the offsets of the structures we need in the kernel. In this way we don't need a different pre-compiled ebpf object file per kernel. The pre-compiled ebpf program is included in the vendoring of tcptracer-bpf. The ebpf program uses kprobes/kretprobes on the following kernel functions: - tcp_v4_connect - tcp_v6_connect - tcp_set_state - inet_csk_accept - tcp_close It generates "connect", "accept" and "close" events containing the connection tuple but also pid and netns. Note: the IPv6 events are not supported in Scope and thus not passed on. probe/endpoint/ebpf.go maintains the list of connections. Similarly to conntrack, it also keeps the dead connections for one iteration in order to report short-lived connections. The code for parsing /proc/$pid/net/tcp{,6} and /proc/$pid/fd/* is still there and still used at start-up because eBPF only brings us the events and not the initial state. However, the /proc parsing for the initial state is now done in foreground instead of background, via newForegroundReader(). NAT resolution on connections from eBPF works in the same way as it did on connections from /proc: by using conntrack. One of the two conntrack instances is only started to get the initial state and then it is stopped since eBPF detects short-lived connections. The Scope Docker image size comparison: - weaveworks/scope in current master: 22 MB (compressed), 68 MB (uncompressed) - weaveworks/scope with this patchset: 23 MB (compressed), 69 MB (uncompressed) Fixes #1168 (walking /proc to obtain connections is very expensive) Fixes #1260 (Short-lived connections not tracked for containers in shared networking namespaces) Fixes #1962 (Port ebpf tracker to Go) Fixes #1961 (Remove runtime kernel header dependency from ebpf tracker)
- Loading branch information
Showing
18 changed files
with
878 additions
and
191 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,250 @@ | ||
package endpoint | ||
|
||
import ( | ||
"strconv" | ||
|
||
log "github.com/Sirupsen/logrus" | ||
"github.com/weaveworks/scope/probe/endpoint/procspy" | ||
"github.com/weaveworks/scope/probe/process" | ||
"github.com/weaveworks/scope/report" | ||
) | ||
|
||
// connectionTrackerConfig are the config options for the endpoint tracker. | ||
type connectionTrackerConfig struct { | ||
HostID string | ||
HostName string | ||
SpyProcs bool | ||
UseConntrack bool | ||
WalkProc bool | ||
UseEbpfConn bool | ||
ProcRoot string | ||
BufferSize int | ||
Scanner procspy.ConnectionScanner | ||
DNSSnooper *DNSSnooper | ||
} | ||
|
||
type connectionTracker struct { | ||
conf connectionTrackerConfig | ||
flowWalker flowWalker // Interface | ||
ebpfTracker eventTracker | ||
reverseResolver *reverseResolver | ||
processCache *process.CachingWalker | ||
} | ||
|
||
func newConnectionTracker(conf connectionTrackerConfig) connectionTracker { | ||
if !conf.UseEbpfConn { | ||
// ebpf OFF, use flowWalker | ||
return connectionTracker{ | ||
conf: conf, | ||
flowWalker: newConntrackFlowWalker(conf.UseConntrack, conf.ProcRoot, conf.BufferSize, "--any-nat"), | ||
ebpfTracker: nil, | ||
reverseResolver: newReverseResolver(), | ||
} | ||
} | ||
// When ebpf will be active by default, check if it starts correctly otherwise fallback to flowWalk | ||
et, err := newEbpfTracker(conf.UseEbpfConn) | ||
if err != nil { | ||
// TODO: fallback to flowWalker, when ebpf is enabled by default | ||
log.Errorf("Error setting up the ebpfTracker, connections will not be reported: %s", err) | ||
noopConnectionTracker := connectionTracker{ | ||
conf: conf, | ||
flowWalker: nil, | ||
ebpfTracker: nil, | ||
reverseResolver: nil, | ||
} | ||
return noopConnectionTracker | ||
} | ||
|
||
var processCache *process.CachingWalker | ||
processCache = process.NewCachingWalker(process.NewWalker(conf.ProcRoot)) | ||
processCache.Tick() | ||
|
||
ct := connectionTracker{ | ||
conf: conf, | ||
flowWalker: nil, | ||
ebpfTracker: et, | ||
reverseResolver: newReverseResolver(), | ||
processCache: processCache, | ||
} | ||
go ct.getInitialState() | ||
return ct | ||
} | ||
|
||
func flowToTuple(f flow) (ft fourTuple) { | ||
ft = fourTuple{ | ||
f.Original.Layer3.SrcIP, | ||
f.Original.Layer3.DstIP, | ||
uint16(f.Original.Layer4.SrcPort), | ||
uint16(f.Original.Layer4.DstPort), | ||
} | ||
// Handle DNAT-ed connections in the initial state | ||
if f.Original.Layer3.DstIP != f.Reply.Layer3.SrcIP { | ||
ft = fourTuple{ | ||
f.Reply.Layer3.DstIP, | ||
f.Reply.Layer3.SrcIP, | ||
uint16(f.Reply.Layer4.DstPort), | ||
uint16(f.Reply.Layer4.SrcPort), | ||
} | ||
} | ||
return ft | ||
} | ||
|
||
// ReportConnections calls trackers accordingly to the configuration. | ||
// When ebpf is enabled, only performEbpfTrack() is called | ||
func (t *connectionTracker) ReportConnections(rpt *report.Report) { | ||
hostNodeID := report.MakeHostNodeID(t.conf.HostID) | ||
|
||
if t.ebpfTracker != nil { | ||
t.performEbpfTrack(rpt, hostNodeID) | ||
return | ||
} | ||
|
||
// seenTuples contains information about connections seen by conntrack and it will be passed to the /proc parser | ||
seenTuples := map[string]fourTuple{} | ||
if t.flowWalker != nil { | ||
t.performFlowWalk(rpt, &seenTuples) | ||
} | ||
// if eBPF was enabled but failed to initialize, Scanner will be nil. | ||
// We can't recover from this, so don't walk proc in that case. | ||
// TODO: implement fallback | ||
if t.conf.WalkProc && t.conf.Scanner != nil { | ||
t.performWalkProc(rpt, hostNodeID, &seenTuples) | ||
} | ||
} | ||
|
||
func (t *connectionTracker) performFlowWalk(rpt *report.Report, seenTuples *map[string]fourTuple) { | ||
// Consult the flowWalker for short-lived connections | ||
extraNodeInfo := map[string]string{ | ||
Conntracked: "true", | ||
} | ||
t.flowWalker.walkFlows(func(f flow, alive bool) { | ||
tuple := flowToTuple(f) | ||
(*seenTuples)[tuple.key()] = tuple | ||
t.addConnection(rpt, tuple, "", extraNodeInfo, extraNodeInfo) | ||
}) | ||
} | ||
|
||
func (t *connectionTracker) performWalkProc(rpt *report.Report, hostNodeID string, seenTuples *map[string]fourTuple) error { | ||
conns, err := t.conf.Scanner.Connections(t.conf.SpyProcs) | ||
if err != nil { | ||
return err | ||
} | ||
for conn := conns.Next(); conn != nil; conn = conns.Next() { | ||
var ( | ||
namespaceID string | ||
tuple = fourTuple{ | ||
conn.LocalAddress.String(), | ||
conn.RemoteAddress.String(), | ||
conn.LocalPort, | ||
conn.RemotePort, | ||
} | ||
toNodeInfo = map[string]string{Procspied: "true"} | ||
fromNodeInfo = map[string]string{Procspied: "true"} | ||
) | ||
if conn.Proc.PID > 0 { | ||
fromNodeInfo[process.PID] = strconv.FormatUint(uint64(conn.Proc.PID), 10) | ||
fromNodeInfo[report.HostNodeID] = hostNodeID | ||
} | ||
|
||
if conn.Proc.NetNamespaceID > 0 { | ||
namespaceID = strconv.FormatUint(conn.Proc.NetNamespaceID, 10) | ||
} | ||
|
||
// If we've already seen this connection, we should know the direction | ||
// (or have already figured it out), so we normalize and use the | ||
// canonical direction. Otherwise, we can use a port-heuristic to guess | ||
// the direction. | ||
canonical, ok := (*seenTuples)[tuple.key()] | ||
if (ok && canonical != tuple) || (!ok && tuple.fromPort < tuple.toPort) { | ||
tuple.reverse() | ||
toNodeInfo, fromNodeInfo = fromNodeInfo, toNodeInfo | ||
} | ||
t.addConnection(rpt, tuple, namespaceID, fromNodeInfo, toNodeInfo) | ||
} | ||
return nil | ||
} | ||
|
||
func (t *connectionTracker) getInitialState() { | ||
scanner := procspy.NewSyncConnectionScanner(t.processCache) | ||
// Run conntrack and proc parsing synchronously only once to initialize ebpfTracker | ||
seenTuples := map[string]fourTuple{} | ||
// Consult the flowWalker to get the initial state | ||
if err := IsConntrackSupported(t.conf.ProcRoot); t.conf.UseConntrack && err != nil { | ||
log.Warnf("Not using conntrack: not supported by the kernel: %s", err) | ||
} else if existingFlows, err := existingConnections([]string{"--any-nat"}); err != nil { | ||
log.Errorf("conntrack existingConnections error: %v", err) | ||
} else { | ||
for _, f := range existingFlows { | ||
tuple := flowToTuple(f) | ||
seenTuples[tuple.key()] = tuple | ||
} | ||
} | ||
|
||
conns, err := scanner.Connections(t.conf.SpyProcs) | ||
if err != nil { | ||
log.Errorf("Error initializing ebpfTracker while scanning /proc, continuing without initial connections: %s", err) | ||
} | ||
scanner.Stop() | ||
|
||
t.ebpfTracker.feedInitialConnections(conns, seenTuples, report.MakeHostNodeID(t.conf.HostID)) | ||
} | ||
|
||
func (t *connectionTracker) performEbpfTrack(rpt *report.Report, hostNodeID string) error { | ||
t.ebpfTracker.walkConnections(func(e ebpfConnection) { | ||
fromNodeInfo := map[string]string{ | ||
EBPF: "true", | ||
} | ||
toNodeInfo := map[string]string{ | ||
EBPF: "true", | ||
} | ||
if e.pid > 0 { | ||
fromNodeInfo[process.PID] = strconv.Itoa(e.pid) | ||
fromNodeInfo[report.HostNodeID] = hostNodeID | ||
} | ||
|
||
if e.incoming { | ||
t.addConnection(rpt, reverse(e.tuple), e.networkNamespace, toNodeInfo, fromNodeInfo) | ||
} else { | ||
t.addConnection(rpt, e.tuple, e.networkNamespace, fromNodeInfo, toNodeInfo) | ||
} | ||
|
||
}) | ||
return nil | ||
} | ||
|
||
func (t *connectionTracker) addConnection(rpt *report.Report, ft fourTuple, namespaceID string, extraFromNode, extraToNode map[string]string) { | ||
var ( | ||
fromNode = t.makeEndpointNode(namespaceID, ft.fromAddr, ft.fromPort, extraFromNode) | ||
toNode = t.makeEndpointNode(namespaceID, ft.toAddr, ft.toPort, extraToNode) | ||
) | ||
rpt.Endpoint = rpt.Endpoint.AddNode(fromNode.WithEdge(toNode.ID, report.EdgeMetadata{})) | ||
rpt.Endpoint = rpt.Endpoint.AddNode(toNode) | ||
} | ||
|
||
func (t *connectionTracker) makeEndpointNode(namespaceID string, addr string, port uint16, extra map[string]string) report.Node { | ||
portStr := strconv.Itoa(int(port)) | ||
node := report.MakeNodeWith( | ||
report.MakeEndpointNodeID(t.conf.HostID, namespaceID, addr, portStr), | ||
map[string]string{Addr: addr, Port: portStr}) | ||
if names := t.conf.DNSSnooper.CachedNamesForIP(addr); len(names) > 0 { | ||
node = node.WithSet(SnoopedDNSNames, report.MakeStringSet(names...)) | ||
} | ||
if names, err := t.reverseResolver.get(addr); err == nil && len(names) > 0 { | ||
node = node.WithSet(ReverseDNSNames, report.MakeStringSet(names...)) | ||
} | ||
if extra != nil { | ||
node = node.WithLatests(extra) | ||
} | ||
return node | ||
} | ||
|
||
func (t *connectionTracker) Stop() error { | ||
if t.ebpfTracker != nil { | ||
t.ebpfTracker.stop() | ||
} | ||
if t.flowWalker != nil { | ||
t.flowWalker.stop() | ||
} | ||
t.reverseResolver.stop() | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.