Skip to content

Commit

Permalink
Add configuration option to ignore DNS RCODE 3 as errors (#566)
Browse files Browse the repository at this point in the history
Signed-off-by: YDMsama <ydmsama@gmail.com>
Signed-off-by: YDMsama <127646431+YDMsama@users.noreply.github.com>
  • Loading branch information
YDMsama authored Aug 30, 2023
1 parent c8b760e commit 410d242
Show file tree
Hide file tree
Showing 14 changed files with 94 additions and 14 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- Expand the histogram bucket of otelexpoerter (Add 1500ms). ([#563](https://github.com/KindlingProject/kindling/pull/563))
- Set default values of `store_external_src_ip` and `StoreExternalSrcIP` to false to reduce occurrences of unexpected src IP data. ([#562](https://github.com/KindlingProject/kindling/pull/562))
- Optimized the `networkanalyzer` component of the probe analyzer by utilizing Go's goroutines, enabling concurrent execution. ([#558](https://github.com/KindlingProject/kindling/pull/558))
- Added a new configuration option ignore_dns_rcode3_error to allow users to specify whether DNS responses with RCODE 3 should be treated as errors. ([#566](https://github.com/KindlingProject/kindling/pull/566))
- Improved event processing efficiency with batch event retrieval in cgo. ([#560](https://github.com/KindlingProject/kindling/pull/560))

### Bug fixes
Expand Down
4 changes: 4 additions & 0 deletions collector/docker/kindling-collector-config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ analyzers:
response_slow_threshold: 500
# Whether enable conntrack module to find pod's ip when calling service
enable_conntrack: true
# Whether to ignore DNS responses with RCODE 3 (Name Error) as errors.
# Useful in Kubernetes clusters using ClusterFirst DNS policy, where KubeDNS may return RCODE 3 for public domains.
# Set to true to treat RCODE 3 as non-errors, default is false.
ignore_dns_rcode3_error: false
conntrack_max_state_size: 131072
conntrack_rate_limit: 500
proc_root: /proc
Expand Down
2 changes: 2 additions & 0 deletions collector/internal/application/factory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ func TestConstructConfig(t *testing.T) {
},
},
UrlClusteringMethod: "blank",

IgnoreDnsRcode3Error: false,
}
assert.Equal(t, expectedNetworkConfig, networkConfig)
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ analyzers:
payload_length: 100
slow_threshold: 200
url_clustering_method: blank
# Whether to ignore DNS responses with RCODE 3 (Name Error) as errors.
# Useful in Kubernetes clusters using ClusterFirst DNS policy, where KubeDNS may return RCODE 3 for public domains.
# Set to true to treat RCODE 3 as non-errors, default is false.
ignore_dns_rcode3_error: true
processors:
k8smetadataprocessor:
# Set "enable" false if you want to run the agent in the non-Kubernetes environment.
Expand Down
2 changes: 2 additions & 0 deletions collector/pkg/component/analyzer/network/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ type Config struct {
ResponseSlowThreshold int `mapstructure:"response_slow_threshold"`

EnableConntrack bool `mapstructure:"enable_conntrack"`
IgnoreDnsRcode3Error bool `mapstructure:"ignore_dns_rcode3_error"`
ConntrackMaxStateSize int `mapstructure:"conntrack_max_state_size"`
ConntrackRateLimit int `mapstructure:"conntrack_rate_limit"`
ProcRoot string `mapstructure:"proc_root"`
Expand All @@ -36,6 +37,7 @@ func NewDefaultConfig() *Config {
NoResponseThreshold: 120,
ResponseSlowThreshold: 500,
EnableConntrack: true,
IgnoreDnsRcode3Error: false,
ConntrackMaxStateSize: 131072,
ConntrackRateLimit: 500,
ProcRoot: "/proc",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func NewNetworkAnalyzer(cfg interface{}, telemetry *component.TelemetryTools, co
na.conntracker, _ = conntracker.NewConntracker(connConfig)
}

na.parserFactory = factory.NewParserFactory(factory.WithUrlClusteringMethod(na.cfg.UrlClusteringMethod))
na.parserFactory = factory.NewParserFactory(factory.WithUrlClusteringMethod(na.cfg.UrlClusteringMethod), factory.WithIgnoreDnsRcode3Error(na.cfg.IgnoreDnsRcode3Error))
na.snaplen = getSnaplenEnv()

return na
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ func TestDnsProtocol(t *testing.T) {
"dns/server-trace-multi.yml")
testProtocol(t, "dns/client-event.yml",
"dns/client-trace-sendmmg.yml")
testProtocol(t, "dns/client-event.yml",
"dns/client-trace-dns3.yml")
testProtocol(t, "dns/client-event-tcp.yml",
"dns/client-trace-tcp.yml")
}
Expand Down Expand Up @@ -134,7 +136,7 @@ func prepareNetworkAnalyzer() *NetworkAnalyzer {
protocol.SetPayLoadLength(config.Key, config.PayloadLength)
na.slowThresholdMap[config.Key] = config.Threshold
}
na.parserFactory = factory.NewParserFactory(factory.WithUrlClusteringMethod(na.cfg.UrlClusteringMethod))
na.parserFactory = factory.NewParserFactory(factory.WithUrlClusteringMethod(na.cfg.UrlClusteringMethod), factory.WithIgnoreDnsRcode3Error(na.cfg.IgnoreDnsRcode3Error))
na.snaplen = 200
// Do not start the timeout check otherwise the test maybe fail
na.cfg.EnableTimeoutCheck = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,16 @@ https://www.rfc-editor.org/rfc/rfc1035
| Additional | RRs holding additional information
+---------------------+
*/
func NewTcpDnsParser() *protocol.ProtocolParser {
func NewTcpDnsParser(ignoreDnsRcode3Error bool) *protocol.ProtocolParser {
requestParser := protocol.CreatePkgParser(fastfailDnsRequest(), parseTcpDnsRequest())
responseParser := protocol.CreatePkgParser(fastfailDnsResponse(), parseTcpDnsResponse())
responseParser := protocol.CreatePkgParser(fastfailDnsResponse(), parseTcpDnsResponse(ignoreDnsRcode3Error))

return protocol.NewProtocolParser(protocol.DNS, requestParser, responseParser, dnsPair())
}

func NewUdpDnsParser() *protocol.ProtocolParser {
func NewUdpDnsParser(ignoreDnsRcode3Error bool) *protocol.ProtocolParser {
requestParser := protocol.CreatePkgParser(fastfailDnsRequest(), parseUdpDnsRequest())
responseParser := protocol.CreatePkgParser(fastfailDnsResponse(), parseUdpDnsResponse())
responseParser := protocol.CreatePkgParser(fastfailDnsResponse(), parseUdpDnsResponse(ignoreDnsRcode3Error))

return protocol.NewProtocolParser(protocol.DNS, requestParser, responseParser, nil)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@ func fastfailDnsResponse() protocol.FastFailFn {
}
}

func parseTcpDnsResponse() protocol.ParsePkgFn {
func parseTcpDnsResponse(ignoreDnsRcode3Error bool) protocol.ParsePkgFn {
return func(message *protocol.PayloadMessage) (bool, bool) {
message.Offset += 2
return parseDnsResponse(message)
return parseDnsResponse(message, ignoreDnsRcode3Error)
}
}

func parseUdpDnsResponse() protocol.ParsePkgFn {
func parseUdpDnsResponse(ignoreDnsRcode3Error bool) protocol.ParsePkgFn {
return func(message *protocol.PayloadMessage) (bool, bool) {
return parseDnsResponse(message)
return parseDnsResponse(message, ignoreDnsRcode3Error)
}
}

Expand All @@ -50,7 +50,7 @@ Header format
| ARCOUNT |
+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
*/
func parseDnsResponse(message *protocol.PayloadMessage) (bool, bool) {
func parseDnsResponse(message *protocol.PayloadMessage, ignoreDnsRcode3Error bool) (bool, bool) {
offset := message.Offset
id, _ := message.ReadUInt16(offset)
flags, _ := message.ReadUInt16(offset + 2)
Expand All @@ -76,7 +76,7 @@ func parseDnsResponse(message *protocol.PayloadMessage) (bool, bool) {
}
message.AddIntAttribute(constlabels.DnsId, int64(id))
message.AddIntAttribute(constlabels.DnsRcode, int64(rcode))
if rcode > 0 {
if (rcode > 0 && rcode != 3) || (rcode == 3 && !ignoreDnsRcode3Error) {
message.AddBoolAttribute(constlabels.IsError, true)
message.AddIntAttribute(constlabels.ErrorType, int64(constlabels.ProtocolError))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ package factory

type config struct {
urlClusteringMethod string
ignoreDnsRcode3Error bool
}

func newDefaultConfig() *config {
return &config{
urlClusteringMethod: "alphabet",
ignoreDnsRcode3Error: false,
}
}

Expand All @@ -17,3 +19,9 @@ func WithUrlClusteringMethod(urlClusteringMethod string) Option {
cfg.urlClusteringMethod = urlClusteringMethod
}
}

func WithIgnoreDnsRcode3Error(ignoreDnsRcode3Error bool) Option {
return func(cfg *config) {
cfg.ignoreDnsRcode3Error = ignoreDnsRcode3Error
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ func NewParserFactory(options ...Option) *ParserFactory {
factory.protocolParsers[protocol.MYSQL] = mysql.NewMysqlParser()
factory.protocolParsers[protocol.REDIS] = redis.NewRedisParser()
factory.protocolParsers[protocol.DUBBO] = dubbo.NewDubboParser()
factory.protocolParsers[protocol.DNS] = dns.NewTcpDnsParser()
factory.protocolParsers[protocol.DNS] = dns.NewTcpDnsParser(factory.config.ignoreDnsRcode3Error)
factory.protocolParsers[protocol.ROCKETMQ] = rocketmq.NewRocketMQParser()
factory.protocolParsers[protocol.NOSUPPORT] = generic.NewGenericParser()

factory.udpDnsParser = dns.NewUdpDnsParser()
factory.udpDnsParser = dns.NewUdpDnsParser(factory.config.ignoreDnsRcode3Error)
return factory
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
trace:
key: dns_rcode_3
requests:
- name: "sendmmsg"
timestamp: 100000000
user_attributes:
latency: 5000
res: 2
data:
- "hex|1f0000000901010000010000000000000377777705626169647503636f6d0000010001"
responses:
- name: "recvfrom"
timestamp: 101000000
user_attributes:
latency: 20000
res: 90
data:
- "hex|0901818300010003000000000377777705626169647503636f6d0000010001c00c00050001000002dc000f0377777701610673686966656ec016c02b000100010000007d0004b46532bcc02b000100010000007d0004b46532f2"
expects:
- Timestamp: 99995000
Values:
request_total_time: 1005000
connect_time: 0
request_sent_time: 5000
waiting_ttfb_time: 980000
content_download_time: 20000
request_io: 31
response_io: 90
Labels:
comm: "systemd-resolve"
pid: 577
request_tid: 577
response_tid: 577
src_ip: "127.0.0.1"
src_port: 60129
dst_ip: "127.0.0.53"
dst_port: 53
dnat_ip: ""
dnat_port: -1
container_id: ""
is_slow: false
is_server: false
protocol: "dns"
dns_rcode: 3
dns_ip: "180.101.50.188"
dns_id: 2305
dns_domain: "www.baidu.com."
is_error: false
error_type: 0
end_timestamp: 101000000
request_payload: ".............www.baidu.com....."
response_payload: ".............www.baidu.com..................www.a.shifen...+.......}...e2..+.......}...e2."
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ analyzers:
no_response_threshold: 120
response_slow_threshold: 500
enable_conntrack: false
ignore_dns_rcode3_error: true
conntrack_max_state_size: 131072
conntrack_rate_limit: 500
proc_root: /proc
Expand Down
4 changes: 4 additions & 0 deletions deploy/agent/kindling-collector-config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ analyzers:
response_slow_threshold: 500
# Whether enable conntrack module to find pod's ip when calling service
enable_conntrack: true
# Whether to ignore DNS responses with RCODE 3 (Name Error) as errors.
# Useful in Kubernetes clusters using ClusterFirst DNS policy, where KubeDNS may return RCODE 3 for public domains.
# Set to true to treat RCODE 3 as non-errors, default is false.
ignore_dns_rcode3_error: false
conntrack_max_state_size: 131072
conntrack_rate_limit: 500
proc_root: /proc
Expand Down

0 comments on commit 410d242

Please sign in to comment.