netobserv · openshift-merge-robot · Jun 24, 2023 · Apr 26, 2023 · Apr 27, 2023 · Jun 2, 2023
diff --git a/bpf/configs.h b/bpf/configs.h
@@ -0,0 +1,9 @@
+
+#ifndef __CONFIGS_H__
+#define __CONFIGS_H__
+
+// Constant definitions, to be overridden by the invoker
+volatile const u32 sampling = 0;
+volatile const u8 trace_messages = 0;
+
+#endif //__CONFIGS_H__
diff --git a/bpf/dns_tracker.h b/bpf/dns_tracker.h
@@ -0,0 +1,103 @@
+/*
+    light weight DNS tracker using trace points.
+*/
+
+#ifndef __DNS_TRACKER_H__
+#define __DNS_TRACKER_H__
+#include "utils.h"
+
+#define DNS_PORT        53
+#define DNS_QR_FLAG     0x8000
+#define UDP_MAXMSG      512
+
+struct dns_header {
+    u16 id;
+    u16 flags;
+    u16 qdcount;
+    u16 ancount;
+    u16 nscount;
+    u16 arcount;
+};
+
+static inline void find_or_create_dns_flow(flow_id *id, struct dns_header *dns, int len, int dir, u16 flags) {
+    flow_metrics *aggregate_flow = bpf_map_lookup_elem(&aggregated_flows, id);
+    u64 current_time = bpf_ktime_get_ns();
+    // net_dev_queue trace point hook will run before TC hooks, so the flow shouldn't exists, if it does
+    // that indicates we have a stale DNS query/response or in the middle of TCP flow so we will do nothing
+    if (aggregate_flow == NULL) {
+        // there is no matching flows so lets create new one and add the drops
+         flow_metrics new_flow;
+         __builtin_memset(&new_flow, 0, sizeof(new_flow));
+         new_flow.start_mono_time_ts = current_time;
+         new_flow.end_mono_time_ts = current_time;
+         new_flow.packets = 1;
+         new_flow.bytes = len;
+         new_flow.flags = flags;
+         new_flow.dns_record.id = bpf_ntohs(dns->id);
+         new_flow.dns_record.flags = bpf_ntohs(dns->flags);
+        if (dir == EGRESS) {
+            new_flow.dns_record.req_mono_time_ts = current_time;
+        } else {
+            new_flow.dns_record.rsp_mono_time_ts = current_time;
+        }
+        bpf_map_update_elem(&aggregated_flows, id, &new_flow, BPF_ANY);
+    }
+}
+
+static inline int trace_dns(struct sk_buff *skb) {
+    flow_id id;
+    u8 protocol = 0;
+    u16 family = 0,flags = 0, len = 0;
+
+    __builtin_memset(&id, 0, sizeof(id));
+
+    id.if_index = skb->skb_iif;
+
+    // read L2 info
+    set_key_with_l2_info(skb, &id, &family);
+
+    // read L3 info
+    set_key_with_l3_info(skb, family, &id, &protocol);
+
+    switch (protocol) {
+    case IPPROTO_UDP:
+        len = set_key_with_udp_info(skb, &id, IPPROTO_UDP);
+        // make sure udp payload doesn't exceed max msg size
+        if (len - sizeof(struct udphdr) > UDP_MAXMSG) {
+            return -1;
+        }
+        // set the length to udp hdr size as it will be used below to locate dns header
+        len = sizeof(struct udphdr);
+        break;
+    case IPPROTO_TCP:
+        len = set_key_with_tcp_info(skb, &id, IPPROTO_TCP, &flags);
+        break;
+    default:
+        return -1;
+    }
+
+    // check for DNS packets
+    if (id.dst_port == DNS_PORT || id.src_port == DNS_PORT) {
+        struct dns_header dns;
+        bpf_probe_read(&dns, sizeof(dns), (struct dns_header *)(skb->head + skb->transport_header + len));
+        if ((bpf_ntohs(dns.flags) & DNS_QR_FLAG) == 0) { /* dns query */
+            id.direction = EGRESS;
+        } else { /* dns response */
+            id.direction = INGRESS;
+        } // end of dns response
+        find_or_create_dns_flow(&id, &dns, skb->len, id.direction, flags);
+    } // end of dns port check
+
+    return 0;
+}
+
+SEC("tracepoint/net/net_dev_queue")
+int trace_net_packets(struct trace_event_raw_net_dev_template *args) {
+    struct sk_buff skb;
+
+    __builtin_memset(&skb, 0, sizeof(skb));
+    bpf_probe_read(&skb, sizeof(struct sk_buff), args->skbaddr);
+    return trace_dns(&skb);
+}
+
+#endif // __DNS_TRACKER_H__
diff --git a/bpf/flow.h b/bpf/flow.h
@@ -10,6 +10,8 @@ typedef __u16 u16;
 typedef __u32 u32;
 typedef __u64 u64;
 
+#define AF_INET  2
+#define AF_INET6 10
 #define ETH_ALEN 6
 #define ETH_P_IP 0x0800
 #define ETH_P_IPV6 0x86DD
@@ -30,8 +32,24 @@ typedef struct flow_metrics_t {
     // 0 otherwise
     // https://chromium.googlesource.com/chromiumos/docs/+/master/constants/errnos.md
     u8 errno;
+    struct tcp_drops_t {
+        u32 packets;
+        u64 bytes;
+        u16 latest_flags;
+        u8 latest_state;
+        u32 latest_drop_cause;
+    } __attribute__((packed)) tcp_drops;
+    struct dns_record_t {
+        u16 id;
+        u16 flags;
+        u64 req_mono_time_ts;
+        u64 rsp_mono_time_ts;
+    } __attribute__((packed)) dns_record;
 } __attribute__((packed)) flow_metrics;
 
+// Force emitting struct tcp_drops into the ELF.
+const struct tcp_drops_t *unused0 __attribute__((unused));
+
 // Force emitting struct flow_metrics into the ELF.
 const struct flow_metrics_t *unused1 __attribute__((unused));
 
@@ -71,4 +89,8 @@ typedef struct flow_record_t {
 
 // Force emitting struct flow_record into the ELF.
 const struct flow_record_t *unused3 __attribute__((unused));
+
+// Force emitting struct dns_record into the ELF.
+const struct dns_record_t *unused4 __attribute__((unused));
+
 #endif
diff --git a/bpf/flows.c b/bpf/flows.c
@@ -13,226 +13,10 @@
             until an entry is available.
         4) When hash collision is detected, we send the new entry to userpace via ringbuffer.
 */
-#include <vmlinux.h>
-#include <bpf_helpers.h>
+#include "utils.h"
+#include "tcp_drops.h"
+#include "dns_tracker.h"
 
-#include "flow.h"
-#define DISCARD 1
-#define SUBMIT 0
-
-// according to field 61 in https://www.iana.org/assignments/ipfix/ipfix.xhtml
-#define INGRESS 0
-#define EGRESS 1
-
-// Flags according to RFC 9293 & https://www.iana.org/assignments/ipfix/ipfix.xhtml
-#define FIN_FLAG 0x01
-#define SYN_FLAG 0x02
-#define RST_FLAG 0x04
-#define PSH_FLAG 0x08
-#define ACK_FLAG 0x10
-#define URG_FLAG 0x20
-#define ECE_FLAG 0x40
-#define CWR_FLAG 0x80
-// Custom flags exported
-#define SYN_ACK_FLAG 0x100
-#define FIN_ACK_FLAG 0x200
-#define RST_ACK_FLAG 0x400
-
-#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
-	__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-#define bpf_ntohs(x)		__builtin_bswap16(x)
-#define bpf_htons(x)		__builtin_bswap16(x)
-#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
-	__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-#define bpf_ntohs(x)		(x)
-#define bpf_htons(x)		(x)
-#else
-# error "Endianness detection needs to be set up for your compiler?!"
-#endif
-
-// Common Ringbuffer as a conduit for ingress/egress flows to userspace
-struct {
-    __uint(type, BPF_MAP_TYPE_RINGBUF);
-    __uint(max_entries, 1 << 24);
-} direct_flows SEC(".maps");
-
-// Key: the flow identifier. Value: the flow metrics for that identifier.
-struct {
-    __uint(type, BPF_MAP_TYPE_HASH);
-    __type(key, flow_id);
-    __type(value, flow_metrics);
-    __uint(max_entries, 1 << 24);
-    __uint(map_flags, BPF_F_NO_PREALLOC);
-} aggregated_flows SEC(".maps");
-
-// Constant definitions, to be overridden by the invoker
-volatile const u32 sampling = 0;
-volatile const u8 trace_messages = 0;
-
-const u8 ip4in6[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff};
-
-// sets the TCP header flags for connection information
-static inline void set_flags(struct tcphdr *th, u16 *flags) {
-    //If both ACK and SYN are set, then it is server -> client communication during 3-way handshake. 
-    if (th->ack && th->syn) {
-        *flags |= SYN_ACK_FLAG;
-    } else if (th->ack && th->fin ) {
-        // If both ACK and FIN are set, then it is graceful termination from server.
-        *flags |= FIN_ACK_FLAG;
-    } else if (th->ack && th->rst ) {
-        // If both ACK and RST are set, then it is abrupt connection termination. 
-        *flags |= RST_ACK_FLAG;
-    } else if (th->fin) {
-        *flags |= FIN_FLAG;
-    } else if (th->syn) {
-        *flags |= SYN_FLAG;
-    } else if (th->ack) {
-        *flags |= ACK_FLAG;
-    } else if (th->rst) {
-        *flags |= RST_FLAG;
-    } else if (th->psh) {
-        *flags |= PSH_FLAG;
-    } else if (th->urg) {
-        *flags |= URG_FLAG;
-    } else if (th->ece) {
-        *flags |= ECE_FLAG;
-    } else if (th->cwr) {
-        *flags |= CWR_FLAG;
-    }
-}
-
-// L4_info structure contains L4 headers parsed information.
-struct l4_info_t {
-    // TCP/UDP/SCTP source port in host byte order
-    u16 src_port;
-    // TCP/UDP/SCTP destination port in host byte order
-    u16 dst_port;
-    // ICMPv4/ICMPv6 type value
-    u8 icmp_type;
-    // ICMPv4/ICMPv6 code value
-    u8 icmp_code;
-    // TCP flags
-    u16 flags;
-};
-
-// Extract L4 info for the supported protocols
-static inline void fill_l4info(void *l4_hdr_start, void *data_end, u8 protocol,
-                               struct l4_info_t *l4_info) {
-	switch (protocol) {
-    case IPPROTO_TCP: {
-        struct tcphdr *tcp = l4_hdr_start;
-        if ((void *)tcp + sizeof(*tcp) <= data_end) {
-            l4_info->src_port = bpf_ntohs(tcp->source);
-            l4_info->dst_port = bpf_ntohs(tcp->dest);
-            set_flags(tcp, &l4_info->flags);
-        }
-    } break;
-    case IPPROTO_UDP: {
-        struct udphdr *udp = l4_hdr_start;
-        if ((void *)udp + sizeof(*udp) <= data_end) {
-            l4_info->src_port = bpf_ntohs(udp->source);
-            l4_info->dst_port = bpf_ntohs(udp->dest);
-        }
-    } break;
-    case IPPROTO_SCTP: {
-        struct sctphdr *sctph = l4_hdr_start;
-        if ((void *)sctph + sizeof(*sctph) <= data_end) {
-            l4_info->src_port = bpf_ntohs(sctph->source);
-            l4_info->dst_port = bpf_ntohs(sctph->dest);
-        }
-    } break;
-    case IPPROTO_ICMP: {
-        struct icmphdr *icmph = l4_hdr_start;
-        if ((void *)icmph + sizeof(*icmph) <= data_end) {
-            l4_info->icmp_type = icmph->type;
-            l4_info->icmp_code = icmph->code;
-        }
-    } break;
-    case IPPROTO_ICMPV6: {
-        struct icmp6hdr *icmp6h = l4_hdr_start;
-         if ((void *)icmp6h + sizeof(*icmp6h) <= data_end) {
-            l4_info->icmp_type = icmp6h->icmp6_type;
-            l4_info->icmp_code = icmp6h->icmp6_code;
-        }
-    } break;
-    default:
-        break;
-    }
-}
-
-// sets flow fields from IPv4 header information
-static inline int fill_iphdr(struct iphdr *ip, void *data_end, flow_id *id, u16 *flags) {
-    struct l4_info_t l4_info;
-    void *l4_hdr_start;
-
-    l4_hdr_start = (void *)ip + sizeof(*ip);
-    if (l4_hdr_start > data_end) {
-        return DISCARD;
-    }
-    __builtin_memset(&l4_info, 0, sizeof(l4_info));
-    __builtin_memcpy(id->src_ip, ip4in6, sizeof(ip4in6));
-    __builtin_memcpy(id->dst_ip, ip4in6, sizeof(ip4in6));
-    __builtin_memcpy(id->src_ip + sizeof(ip4in6), &ip->saddr, sizeof(ip->saddr));
-    __builtin_memcpy(id->dst_ip + sizeof(ip4in6), &ip->daddr, sizeof(ip->daddr));
-    id->transport_protocol = ip->protocol;
-    fill_l4info(l4_hdr_start, data_end, ip->protocol, &l4_info);
-    id->src_port = l4_info.src_port;
-    id->dst_port = l4_info.dst_port;
-    id->icmp_type = l4_info.icmp_type;
-    id->icmp_code = l4_info.icmp_code;
-    *flags = l4_info.flags;
-
-    return SUBMIT;
-}
-
-// sets flow fields from IPv6 header information
-static inline int fill_ip6hdr(struct ipv6hdr *ip, void *data_end, flow_id *id, u16 *flags) {
-    struct l4_info_t l4_info;
-    void *l4_hdr_start;
-
-    l4_hdr_start = (void *)ip + sizeof(*ip);
-    if (l4_hdr_start > data_end) {
-        return DISCARD;
-    }
-    __builtin_memset(&l4_info, 0, sizeof(l4_info));
-    __builtin_memcpy(id->src_ip, ip->saddr.in6_u.u6_addr8, 16);
-    __builtin_memcpy(id->dst_ip, ip->daddr.in6_u.u6_addr8, 16);
-    id->transport_protocol = ip->nexthdr;
-    fill_l4info(l4_hdr_start, data_end, ip->nexthdr, &l4_info);
-    id->src_port = l4_info.src_port;
-    id->dst_port = l4_info.dst_port;
-    id->icmp_type = l4_info.icmp_type;
-    id->icmp_code = l4_info.icmp_code;
-    *flags = l4_info.flags;
-
-    return SUBMIT;
-}
-// sets flow fields from Ethernet header information
-static inline int fill_ethhdr(struct ethhdr *eth, void *data_end, flow_id *id, u16 *flags) {
-    if ((void *)eth + sizeof(*eth) > data_end) {
-        return DISCARD;
-    }
-    __builtin_memcpy(id->dst_mac, eth->h_dest, ETH_ALEN);
-    __builtin_memcpy(id->src_mac, eth->h_source, ETH_ALEN);
-    id->eth_protocol = bpf_ntohs(eth->h_proto);
-
-    if (id->eth_protocol == ETH_P_IP) {
-        struct iphdr *ip = (void *)eth + sizeof(*eth);
-        return fill_iphdr(ip, data_end, id, flags);
-    } else if (id->eth_protocol == ETH_P_IPV6) {
-        struct ipv6hdr *ip6 = (void *)eth + sizeof(*eth);
-        return fill_ip6hdr(ip6, data_end, id, flags);
-    } else {
-        // TODO : Need to implement other specific ethertypes if needed
-        // For now other parts of flow id remain zero
-        __builtin_memset(&(id->src_ip), 0, sizeof(struct in6_addr));
-        __builtin_memset(&(id->dst_ip), 0, sizeof(struct in6_addr));
-        id->transport_protocol = 0;
-        id->src_port = 0;
-        id->dst_port = 0;
-    }
-    return SUBMIT;
-}
 
 static inline int flow_monitor(struct __sk_buff *skb, u8 direction) {
     // If sampling is defined, will only parse 1 out of "sampling" flows
@@ -317,4 +101,5 @@ SEC("tc_egress")
 int egress_flow_parse(struct __sk_buff *skb) {
     return flow_monitor(skb, EGRESS);
 }
+
 char _license[] SEC("license") = "GPL";