diff --git a/probe/endpoint/dns_snooper_linux_amd64.go b/probe/endpoint/dns_snooper_linux_amd64.go new file mode 100644 index 0000000000..2c5b9dc760 --- /dev/null +++ b/probe/endpoint/dns_snooper_linux_amd64.go @@ -0,0 +1,208 @@ +package endpoint + +import ( + "bytes" + "fmt" + "math" + "time" + + log "github.com/Sirupsen/logrus" + "github.com/bluele/gcache" + "github.com/google/gopacket" + "github.com/google/gopacket/layers" + "github.com/google/gopacket/pcap" +) + +const ( + bufSize = 8 * 1024 * 1024 // 8MB + maxReverseDNSrecords = 10000 +) + +// DNSSnooper is a snopper of DNS queries +type DNSSnooper struct { + stop chan struct{} + pcapHandle *pcap.Handle + reverseDNSCache gcache.Cache +} + +// NewDNSSnooper creates a new snooper of DNS queries +func NewDNSSnooper() (*DNSSnooper, error) { + pcapHandle, err := newPcapHandle() + if err != nil { + return nil, err + } + reverseDNSCache := gcache.New(maxReverseDNSrecords).LRU().Build() + + s := &DNSSnooper{ + stop: make(chan struct{}), + pcapHandle: pcapHandle, + reverseDNSCache: reverseDNSCache, + } + go s.run() + return s, nil +} + +func newPcapHandle() (*pcap.Handle, error) { + inactive, err := pcap.NewInactiveHandle("any") + if err != nil { + return nil, err + } + defer inactive.CleanUp() + // pcap timeout blackmagic copied from Weave Net to reduce CPU consumption + // see https://github.com/weaveworks/weave/commit/025315363d5ea8b8265f1b3ea800f24df2be51a4 + if err = inactive.SetTimeout(time.Duration(math.MaxInt64)); err != nil { + return nil, err + } + if err = inactive.SetImmediateMode(true); err != nil { + // If gopacket is compiled against an older pcap.h that + // doesn't have pcap_set_immediate_mode, it supplies a dummy + // definition that always returns PCAP_ERROR. That becomes + // "Generic error", which is not very helpful. The real + // pcap_set_immediate_mode never returns PCAP_ERROR, so this + // turns it into a more informative message. + if fmt.Sprint(err) == "Generic error" { + return nil, fmt.Errorf("compiled against an old version of libpcap; please compile against libpcap-1.5.0 or later") + } + + return nil, err + } + if err = inactive.SetBufferSize(bufSize); err != nil { + return nil, err + } + pcapHandle, err := inactive.Activate() + if err != nil { + return nil, err + } + if err := pcapHandle.SetDirection(pcap.DirectionIn); err != nil { + pcapHandle.Close() + return nil, err + } + if err := pcapHandle.SetBPFFilter("inbound and port 53"); err != nil { + pcapHandle.Close() + return nil, err + } + + return pcapHandle, nil +} + +// CachedNamesForIP obtains the domains associated to an IP, +// obtained while snooping A-record queries +func (s *DNSSnooper) CachedNamesForIP(ip string) []string { + result := []string{} + if s == nil { + return result + } + domains, err := s.reverseDNSCache.Get(ip) + if err != nil { + return result + } + + for domain := range domains.(map[string]struct{}) { + result = append(result, domain) + } + + return result +} + +// Stop makes the snooper stop inspecting DNS communications +func (s *DNSSnooper) Stop() { + if s != nil { + close(s.stop) + } +} + +func (s *DNSSnooper) run() { + var ( + decodedLayers []gopacket.LayerType + dns layers.DNS + udp layers.UDP + tcp layers.TCP + ip4 layers.IPv4 + ip6 layers.IPv6 + eth layers.Ethernet + sll layers.LinuxSLL + ) + + // assumes that the "any" interface is being used (see https://wiki.wireshark.org/SLL) + packetParser := gopacket.NewDecodingLayerParser(layers.LayerTypeLinuxSLL, &sll, ð, &ip4, &ip6, &udp, &tcp, &dns) + + for { + select { + case <-s.stop: + s.pcapHandle.Close() + return + default: + } + + packet, _, err := s.pcapHandle.ZeroCopyReadPacketData() + if err != nil { + // TimeoutExpired is acceptable due to the Timeout black magic + // on the handle + if err != pcap.NextErrorTimeoutExpired { + log.Errorf("DNSSnooper: error reading packet data: %s", err) + } + continue + } + + if err := packetParser.DecodeLayers(packet, &decodedLayers); err != nil { + log.Errorf("DNSSnooper: error decoding packet: %s", err) + continue + } + + for _, layerType := range decodedLayers { + if layerType == layers.LayerTypeDNS { + s.processDNSMessage(&dns) + } + } + } +} + +func (s *DNSSnooper) processDNSMessage(dns *layers.DNS) { + + // Only consider responses to singleton, A-record questions + if !dns.QR || dns.ResponseCode != 0 || len(dns.Questions) != 1 { + return + } + question := dns.Questions[0] + if question.Type != layers.DNSTypeA || question.Class != layers.DNSClassIN { + return + } + + var ( + domainQueried = question.Name + records = append(dns.Answers, dns.Additionals...) + ips = map[string]struct{}{} + alias []byte + ) + + // Traverse records for a CNAME first since the DNS RFCs don't seem to guarantee it + // appearing before its A-records + for _, record := range records { + if record.Type == layers.DNSTypeCNAME && record.Class == layers.DNSClassIN && bytes.Equal(domainQueried, record.Name) { + alias = record.CNAME + break + } + } + + // Finally, get the answer + for _, record := range records { + if record.Type != layers.DNSTypeA || record.Class != layers.DNSClassIN { + continue + } + if bytes.Equal(domainQueried, record.Name) || (alias != nil && bytes.Equal(alias, record.Name)) { + ips[record.IP.String()] = struct{}{} + } + } + + // Update cache + newDomain := string(domainQueried) + log.Debugf("DNSSnooper: caught DNS lookup: %s -> %v", newDomain, ips) + for ip := range ips { + if existingDomains, err := s.reverseDNSCache.Get(ip); err != nil { + s.reverseDNSCache.Set(ip, map[string]struct{}{newDomain: {}}) + } else { + // TODO: Be smarter about the expiration of entries with pre-existing associated domains + existingDomains.(map[string]struct{})[newDomain] = struct{}{} + } + } +} diff --git a/probe/endpoint/dns_snooper_others.go b/probe/endpoint/dns_snooper_others.go new file mode 100644 index 0000000000..52b18cde47 --- /dev/null +++ b/probe/endpoint/dns_snooper_others.go @@ -0,0 +1,25 @@ +// +build darwin arm + +// Cross-compiling the snooper requires having pcap binaries, +// let's disable it for now. +// See http://stackoverflow.com/questions/31648793/go-programming-cross-compile-for-revel-framework + +package endpoint + +// DNSSnooper is a snopper of DNS queries +type DNSSnooper struct{} + +// NewDNSSnooper creates a new snooper of DNS queries +func NewDNSSnooper() (*DNSSnooper, error) { + return nil, nil +} + +// CachedNamesForIP obtains the domains associated to an IP, +// obtained while snooping A-record queries +func (s *DNSSnooper) CachedNamesForIP(ip string) []string { + return []string{} +} + +// Stop makes the snooper stop inspecting DNS communications +func (s *DNSSnooper) Stop() { +} diff --git a/probe/endpoint/reporter.go b/probe/endpoint/reporter.go index a9d61a6b9d..f357e6efd6 100644 --- a/probe/endpoint/reporter.go +++ b/probe/endpoint/reporter.go @@ -21,6 +21,7 @@ const ( Conntracked = "conntracked" Procspied = "procspied" ReverseDNSNames = "reverse_dns_names" + SnoopedDNSNames = "snooped_dns_names" ) // Reporter generates Reports containing the Endpoint topology. @@ -33,6 +34,7 @@ type Reporter struct { scanner procspy.ConnectionScanner natMapper natMapper reverseResolver *reverseResolver + dnsSnooper *DNSSnooper } // SpyDuration is an exported prometheus metric @@ -52,7 +54,7 @@ var SpyDuration = prometheus.NewSummaryVec( // on the host machine, at the granularity of host and port. That information // is stored in the Endpoint topology. It optionally enriches that topology // with process (PID) information. -func NewReporter(hostID, hostName string, spyProcs, useConntrack, walkProc bool, procRoot string, scanner procspy.ConnectionScanner) *Reporter { +func NewReporter(hostID, hostName string, spyProcs, useConntrack, walkProc bool, procRoot string, scanner procspy.ConnectionScanner, dnsSnooper *DNSSnooper) *Reporter { return &Reporter{ hostID: hostID, hostName: hostName, @@ -62,6 +64,7 @@ func NewReporter(hostID, hostName string, spyProcs, useConntrack, walkProc bool, natMapper: makeNATMapper(newConntrackFlowWalker(useConntrack, procRoot, "--any-nat")), reverseResolver: newReverseResolver(), scanner: scanner, + dnsSnooper: dnsSnooper, } } @@ -193,9 +196,10 @@ func (r *Reporter) makeEndpointNode(namespaceID string, addr string, port uint16 node := report.MakeNodeWith( report.MakeEndpointNodeID(r.hostID, namespaceID, addr, portStr), map[string]string{Addr: addr, Port: portStr}) - // In case we have a reverse resolution for the IP, we can use it for - // the name... - if names, err := r.reverseResolver.get(addr); err == nil { + if names := r.dnsSnooper.CachedNamesForIP(addr); len(names) > 0 { + node = node.WithSet(SnoopedDNSNames, report.MakeStringSet(names...)) + } + if names, err := r.reverseResolver.get(addr); err == nil && len(names) > 0 { node = node.WithSet(ReverseDNSNames, report.MakeStringSet(names...)) } if extra != nil { diff --git a/probe/endpoint/reporter_test.go b/probe/endpoint/reporter_test.go index e62307266b..7a6f4e948d 100644 --- a/probe/endpoint/reporter_test.go +++ b/probe/endpoint/reporter_test.go @@ -69,7 +69,7 @@ func TestSpyNoProcesses(t *testing.T) { ) scanner := procspy.FixedScanner(fixConnections) - reporter := endpoint.NewReporter(nodeID, nodeName, false, false, false, "", scanner) + reporter := endpoint.NewReporter(nodeID, nodeName, false, false, false, "", scanner, nil) r, _ := reporter.Report() //buf, _ := json.MarshalIndent(r, "", " ") //t.Logf("\n%s\n", buf) @@ -86,7 +86,7 @@ func TestSpyWithProcesses(t *testing.T) { ) scanner := procspy.FixedScanner(fixConnectionsWithProcesses) - reporter := endpoint.NewReporter(nodeID, nodeName, true, false, true, "", scanner) + reporter := endpoint.NewReporter(nodeID, nodeName, true, false, true, "", scanner, nil) r, _ := reporter.Report() // buf, _ := json.MarshalIndent(r, "", " ") ; t.Logf("\n%s\n", buf) diff --git a/prog/probe.go b/prog/probe.go index 8562a5d8d1..65f52ed5d3 100644 --- a/prog/probe.go +++ b/prog/probe.go @@ -145,7 +145,14 @@ func probeMain(flags probeFlags) { p.AddReporter(process.NewReporter(processCache, hostID, process.GetDeltaTotalJiffies)) } - endpointReporter := endpoint.NewReporter(hostID, hostName, flags.spyProcs, flags.useConntrack, flags.procEnabled, flags.procRoot, scanner) + dnsSnooper, err := endpoint.NewDNSSnooper() + if err != nil { + log.Errorf("Failed to start DNS snooper: nodes for external services will be less accurate: %s", err) + } else { + defer dnsSnooper.Stop() + } + + endpointReporter := endpoint.NewReporter(hostID, hostName, flags.spyProcs, flags.useConntrack, flags.procEnabled, flags.procRoot, scanner, dnsSnooper) defer endpointReporter.Stop() p.AddReporter(endpointReporter) diff --git a/render/container.go b/render/container.go index c4bab4fc50..eec54e8727 100644 --- a/render/container.go +++ b/render/container.go @@ -74,8 +74,8 @@ func ShortLivedConnectionJoin(r Renderer, toIPs func(report.Node) []string) Rend return report.Nodes{} } - // Propagate the internet pseudo node - if strings.HasSuffix(n.ID, TheInternetID) { + // Propagate the internet and service pseudo nodes + if strings.HasSuffix(n.ID, TheInternetID) || strings.HasPrefix(n.ID, ServiceNodeIDPrefix) { return report.Nodes{n.ID: n} } @@ -112,7 +112,7 @@ func ShortLivedConnectionJoin(r Renderer, toIPs func(report.Node) []string) Rend return report.Nodes{} } if ip := net.ParseIP(addr); ip != nil && !local.Contains(ip) { - node := theInternetNode(m) + node := externalNode(m) return report.Nodes{node.ID: node} } diff --git a/render/detailed/summary.go b/render/detailed/summary.go index ae95a39541..f88a62a077 100644 --- a/render/detailed/summary.go +++ b/render/detailed/summary.go @@ -115,6 +115,7 @@ func pseudoNodeSummary(base NodeSummary, n report.Node) (NodeSummary, bool) { base.Pseudo = true base.Rank = n.ID + // try rendering as an internet node if template, ok := templates[n.ID]; ok { base.Label = template.Label base.LabelMinor = template.LabelMinor @@ -122,6 +123,14 @@ func pseudoNodeSummary(base NodeSummary, n report.Node) (NodeSummary, bool) { return base, true } + // try rendering as a known service node + if strings.HasPrefix(n.ID, render.ServiceNodeIDPrefix) { + base.Label = n.ID[len(render.ServiceNodeIDPrefix):] + base.LabelMinor = "" + base.Shape = report.Cloud + return base, true + } + // try rendering it as an uncontained node if strings.HasPrefix(n.ID, render.MakePseudoNodeID(render.UncontainedID)) { base.Label = render.UncontainedMajor diff --git a/render/filters.go b/render/filters.go index 82e2fc2a1b..347ca36f35 100644 --- a/render/filters.go +++ b/render/filters.go @@ -245,9 +245,9 @@ func IsApplication(n report.Node) bool { var IsSystem = Complement(IsApplication) // IsNotPseudo returns true if the node is not a pseudo node -// or the internet nodes. +// or internet/service nodes. func IsNotPseudo(n report.Node) bool { - return n.Topology != Pseudo || strings.HasSuffix(n.ID, TheInternetID) + return n.Topology != Pseudo || strings.HasSuffix(n.ID, TheInternetID) || strings.HasPrefix(n.ID, ServiceNodeIDPrefix) } // IsNamespace checks if the node is a pod/service in the specified namespace diff --git a/render/process.go b/render/process.go index c756fa2c3a..7c8f14d18a 100644 --- a/render/process.go +++ b/render/process.go @@ -2,6 +2,7 @@ package render import ( "net" + "sort" "github.com/weaveworks/scope/probe/docker" "github.com/weaveworks/scope/probe/endpoint" @@ -96,8 +97,8 @@ func MapEndpoint2Pseudo(n report.Node, local report.Networks) report.Nodes { if ip := net.ParseIP(addr); ip != nil && !local.Contains(ip) { // If the dstNodeAddr is not in a network local to this report, we emit an - // internet node - node = theInternetNode(n) + // external pseudoNode + node = externalNode(n) } else { // due to https://github.com/weaveworks/scope/issues/1323 we are dropping // all non-internet pseudo nodes for now. @@ -157,7 +158,21 @@ func MapProcess2Name(n report.Node, _ report.Networks) report.Nodes { return report.Nodes{name: node} } -func theInternetNode(m report.Node) report.Node { +func externalNode(m report.Node) report.Node { + // First, check if it's a known service and emit a + // a specific node if it is + snoopedHostnames, _ := m.Sets.Lookup(endpoint.SnoopedDNSNames) + reverseHostnames, _ := m.Sets.Lookup(endpoint.ReverseDNSNames) + // Sort the names to make the lookup more deterministic + sort.StringSlice(snoopedHostnames).Sort() + sort.StringSlice(reverseHostnames).Sort() + // Intentionally prioritize snooped hostnames + for _, hostname := range append(snoopedHostnames, reverseHostnames...) { + if isKnownService(hostname) { + return NewDerivedPseudoNode(ServiceNodeIDPrefix+hostname, m) + } + } + // emit one internet node for incoming, one for outgoing if len(m.Adjacency) > 0 { return NewDerivedPseudoNode(IncomingInternetID, m) diff --git a/render/theinternet.go b/render/theinternet.go index 6a3c137399..9d6c7d3f64 100644 --- a/render/theinternet.go +++ b/render/theinternet.go @@ -2,11 +2,51 @@ package render import ( "net" + "regexp" "github.com/weaveworks/scope/probe/host" "github.com/weaveworks/scope/report" ) +var ( + // ServiceNodeIDPrefix is how the ID of all service pseudo nodes begin + ServiceNodeIDPrefix = "service-" + + knownServiceMatchers = []*regexp.Regexp{ + // See http://docs.aws.amazon.com/general/latest/gr/rande.html for fainer grained + // details + regexp.MustCompile(`^.+\.amazonaws\.com$`), + regexp.MustCompile(`^.+\.googleapis\.com$`), + } + + knownServiceExcluders = []*regexp.Regexp{ + // We exclude ec2 machines because they are too generic + // and having separate nodes for them makes visualizations worse + regexp.MustCompile(`^ec2.*\.amazonaws\.com$`), + } +) + +// TODO: Make it user-customizable https://github.com/weaveworks/scope/issues/1876 +func isKnownService(hostname string) bool { + foundMatch := false + for _, matcher := range knownServiceMatchers { + if matcher.MatchString(hostname) { + foundMatch = true + break + } + } + if !foundMatch { + return false + } + + for _, excluder := range knownServiceExcluders { + if excluder.MatchString(hostname) { + return false + } + } + return true +} + // LocalNetworks returns a superset of the networks (think: CIDRs) that are // "local" from the perspective of each host represented in the report. It's // used to determine which nodes in the report are "remote", i.e. outside of