Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NETOBSERV-1061: Add TCP drop and DNS tracking hooks #115

Merged
merged 4 commits into from
Jun 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions bpf/configs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@

#ifndef __CONFIGS_H__
#define __CONFIGS_H__

// Constant definitions, to be overridden by the invoker
volatile const u32 sampling = 0;
volatile const u8 trace_messages = 0;

#endif //__CONFIGS_H__
103 changes: 103 additions & 0 deletions bpf/dns_tracker.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
light weight DNS tracker using trace points.
*/

#ifndef __DNS_TRACKER_H__
#define __DNS_TRACKER_H__
#include "utils.h"

#define DNS_PORT 53
#define DNS_QR_FLAG 0x8000
#define UDP_MAXMSG 512

struct dns_header {
u16 id;
u16 flags;
u16 qdcount;
u16 ancount;
u16 nscount;
u16 arcount;
};

static inline void find_or_create_dns_flow(flow_id *id, struct dns_header *dns, int len, int dir, u16 flags) {
flow_metrics *aggregate_flow = bpf_map_lookup_elem(&aggregated_flows, id);
u64 current_time = bpf_ktime_get_ns();
// net_dev_queue trace point hook will run before TC hooks, so the flow shouldn't exists, if it does
// that indicates we have a stale DNS query/response or in the middle of TCP flow so we will do nothing
if (aggregate_flow == NULL) {
// there is no matching flows so lets create new one and add the drops
flow_metrics new_flow;
__builtin_memset(&new_flow, 0, sizeof(new_flow));
new_flow.start_mono_time_ts = current_time;
new_flow.end_mono_time_ts = current_time;
new_flow.packets = 1;
new_flow.bytes = len;
new_flow.flags = flags;
new_flow.dns_record.id = bpf_ntohs(dns->id);
new_flow.dns_record.flags = bpf_ntohs(dns->flags);
if (dir == EGRESS) {
new_flow.dns_record.req_mono_time_ts = current_time;
} else {
new_flow.dns_record.rsp_mono_time_ts = current_time;
}
bpf_map_update_elem(&aggregated_flows, id, &new_flow, BPF_ANY);
}
}

static inline int trace_dns(struct sk_buff *skb) {
flow_id id;
u8 protocol = 0;
u16 family = 0,flags = 0, len = 0;

__builtin_memset(&id, 0, sizeof(id));

id.if_index = skb->skb_iif;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if this will be set for a locally-generated packet (or to what value in this case)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we don't deal with locally generated packets we handle pkts that enter and exists the container interfaces

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How are you ensuring that?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the whole idea of netobserv agent its interface based feature , no interface set won't match any records in our maps and will be ignored


// read L2 info
set_key_with_l2_info(skb, &id, &family);

// read L3 info
set_key_with_l3_info(skb, family, &id, &protocol);

switch (protocol) {
case IPPROTO_UDP:
len = set_key_with_udp_info(skb, &id, IPPROTO_UDP);
// make sure udp payload doesn't exceed max msg size
if (len - sizeof(struct udphdr) > UDP_MAXMSG) {
return -1;
}
// set the length to udp hdr size as it will be used below to locate dns header
len = sizeof(struct udphdr);
break;
case IPPROTO_TCP:
len = set_key_with_tcp_info(skb, &id, IPPROTO_TCP, &flags);
break;
default:
return -1;
}

// check for DNS packets
if (id.dst_port == DNS_PORT || id.src_port == DNS_PORT) {
struct dns_header dns;
bpf_probe_read(&dns, sizeof(dns), (struct dns_header *)(skb->head + skb->transport_header + len));
msherif1234 marked this conversation as resolved.
Show resolved Hide resolved
if ((bpf_ntohs(dns.flags) & DNS_QR_FLAG) == 0) { /* dns query */
id.direction = EGRESS;
msherif1234 marked this conversation as resolved.
Show resolved Hide resolved
} else { /* dns response */
id.direction = INGRESS;
} // end of dns response
find_or_create_dns_flow(&id, &dns, skb->len, id.direction, flags);
} // end of dns port check

return 0;
}

SEC("tracepoint/net/net_dev_queue")
msherif1234 marked this conversation as resolved.
Show resolved Hide resolved
int trace_net_packets(struct trace_event_raw_net_dev_template *args) {
struct sk_buff skb;

__builtin_memset(&skb, 0, sizeof(skb));
bpf_probe_read(&skb, sizeof(struct sk_buff), args->skbaddr);
return trace_dns(&skb);
}

#endif // __DNS_TRACKER_H__
22 changes: 22 additions & 0 deletions bpf/flow.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ typedef __u16 u16;
typedef __u32 u32;
typedef __u64 u64;
msherif1234 marked this conversation as resolved.
Show resolved Hide resolved

#define AF_INET 2
#define AF_INET6 10
#define ETH_ALEN 6
#define ETH_P_IP 0x0800
#define ETH_P_IPV6 0x86DD
Expand All @@ -30,8 +32,24 @@ typedef struct flow_metrics_t {
// 0 otherwise
// https://chromium.googlesource.com/chromiumos/docs/+/master/constants/errnos.md
u8 errno;
struct tcp_drops_t {
msherif1234 marked this conversation as resolved.
Show resolved Hide resolved
u32 packets;
u64 bytes;
u16 latest_flags;
u8 latest_state;
u32 latest_drop_cause;
} __attribute__((packed)) tcp_drops;
struct dns_record_t {
u16 id;
u16 flags;
u64 req_mono_time_ts;
u64 rsp_mono_time_ts;
} __attribute__((packed)) dns_record;
} __attribute__((packed)) flow_metrics;

// Force emitting struct tcp_drops into the ELF.
const struct tcp_drops_t *unused0 __attribute__((unused));
msherif1234 marked this conversation as resolved.
Show resolved Hide resolved

// Force emitting struct flow_metrics into the ELF.
const struct flow_metrics_t *unused1 __attribute__((unused));

Expand Down Expand Up @@ -71,4 +89,8 @@ typedef struct flow_record_t {

// Force emitting struct flow_record into the ELF.
const struct flow_record_t *unused3 __attribute__((unused));

// Force emitting struct dns_record into the ELF.
const struct dns_record_t *unused4 __attribute__((unused));

#endif
223 changes: 4 additions & 219 deletions bpf/flows.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,226 +13,10 @@
until an entry is available.
4) When hash collision is detected, we send the new entry to userpace via ringbuffer.
*/
#include <vmlinux.h>
#include <bpf_helpers.h>
#include "utils.h"
#include "tcp_drops.h"
#include "dns_tracker.h"

#include "flow.h"
#define DISCARD 1
#define SUBMIT 0

// according to field 61 in https://www.iana.org/assignments/ipfix/ipfix.xhtml
#define INGRESS 0
#define EGRESS 1

// Flags according to RFC 9293 & https://www.iana.org/assignments/ipfix/ipfix.xhtml
#define FIN_FLAG 0x01
#define SYN_FLAG 0x02
#define RST_FLAG 0x04
#define PSH_FLAG 0x08
#define ACK_FLAG 0x10
#define URG_FLAG 0x20
#define ECE_FLAG 0x40
#define CWR_FLAG 0x80
// Custom flags exported
#define SYN_ACK_FLAG 0x100
#define FIN_ACK_FLAG 0x200
#define RST_ACK_FLAG 0x400

#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define bpf_ntohs(x) __builtin_bswap16(x)
#define bpf_htons(x) __builtin_bswap16(x)
#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define bpf_ntohs(x) (x)
#define bpf_htons(x) (x)
#else
# error "Endianness detection needs to be set up for your compiler?!"
#endif

// Common Ringbuffer as a conduit for ingress/egress flows to userspace
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
__uint(max_entries, 1 << 24);
} direct_flows SEC(".maps");

// Key: the flow identifier. Value: the flow metrics for that identifier.
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, flow_id);
__type(value, flow_metrics);
__uint(max_entries, 1 << 24);
__uint(map_flags, BPF_F_NO_PREALLOC);
} aggregated_flows SEC(".maps");

// Constant definitions, to be overridden by the invoker
volatile const u32 sampling = 0;
volatile const u8 trace_messages = 0;

const u8 ip4in6[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff};

// sets the TCP header flags for connection information
static inline void set_flags(struct tcphdr *th, u16 *flags) {
//If both ACK and SYN are set, then it is server -> client communication during 3-way handshake.
if (th->ack && th->syn) {
*flags |= SYN_ACK_FLAG;
} else if (th->ack && th->fin ) {
// If both ACK and FIN are set, then it is graceful termination from server.
*flags |= FIN_ACK_FLAG;
} else if (th->ack && th->rst ) {
// If both ACK and RST are set, then it is abrupt connection termination.
*flags |= RST_ACK_FLAG;
} else if (th->fin) {
*flags |= FIN_FLAG;
} else if (th->syn) {
*flags |= SYN_FLAG;
} else if (th->ack) {
*flags |= ACK_FLAG;
} else if (th->rst) {
*flags |= RST_FLAG;
} else if (th->psh) {
*flags |= PSH_FLAG;
} else if (th->urg) {
*flags |= URG_FLAG;
} else if (th->ece) {
*flags |= ECE_FLAG;
} else if (th->cwr) {
*flags |= CWR_FLAG;
}
}

// L4_info structure contains L4 headers parsed information.
struct l4_info_t {
// TCP/UDP/SCTP source port in host byte order
u16 src_port;
// TCP/UDP/SCTP destination port in host byte order
u16 dst_port;
// ICMPv4/ICMPv6 type value
u8 icmp_type;
// ICMPv4/ICMPv6 code value
u8 icmp_code;
// TCP flags
u16 flags;
};

// Extract L4 info for the supported protocols
static inline void fill_l4info(void *l4_hdr_start, void *data_end, u8 protocol,
struct l4_info_t *l4_info) {
switch (protocol) {
case IPPROTO_TCP: {
struct tcphdr *tcp = l4_hdr_start;
if ((void *)tcp + sizeof(*tcp) <= data_end) {
l4_info->src_port = bpf_ntohs(tcp->source);
l4_info->dst_port = bpf_ntohs(tcp->dest);
set_flags(tcp, &l4_info->flags);
}
} break;
case IPPROTO_UDP: {
struct udphdr *udp = l4_hdr_start;
if ((void *)udp + sizeof(*udp) <= data_end) {
l4_info->src_port = bpf_ntohs(udp->source);
l4_info->dst_port = bpf_ntohs(udp->dest);
}
} break;
case IPPROTO_SCTP: {
struct sctphdr *sctph = l4_hdr_start;
if ((void *)sctph + sizeof(*sctph) <= data_end) {
l4_info->src_port = bpf_ntohs(sctph->source);
l4_info->dst_port = bpf_ntohs(sctph->dest);
}
} break;
case IPPROTO_ICMP: {
struct icmphdr *icmph = l4_hdr_start;
if ((void *)icmph + sizeof(*icmph) <= data_end) {
l4_info->icmp_type = icmph->type;
l4_info->icmp_code = icmph->code;
}
} break;
case IPPROTO_ICMPV6: {
struct icmp6hdr *icmp6h = l4_hdr_start;
if ((void *)icmp6h + sizeof(*icmp6h) <= data_end) {
l4_info->icmp_type = icmp6h->icmp6_type;
l4_info->icmp_code = icmp6h->icmp6_code;
}
} break;
default:
break;
}
}

// sets flow fields from IPv4 header information
static inline int fill_iphdr(struct iphdr *ip, void *data_end, flow_id *id, u16 *flags) {
struct l4_info_t l4_info;
void *l4_hdr_start;

l4_hdr_start = (void *)ip + sizeof(*ip);
if (l4_hdr_start > data_end) {
return DISCARD;
}
__builtin_memset(&l4_info, 0, sizeof(l4_info));
__builtin_memcpy(id->src_ip, ip4in6, sizeof(ip4in6));
__builtin_memcpy(id->dst_ip, ip4in6, sizeof(ip4in6));
__builtin_memcpy(id->src_ip + sizeof(ip4in6), &ip->saddr, sizeof(ip->saddr));
__builtin_memcpy(id->dst_ip + sizeof(ip4in6), &ip->daddr, sizeof(ip->daddr));
id->transport_protocol = ip->protocol;
fill_l4info(l4_hdr_start, data_end, ip->protocol, &l4_info);
id->src_port = l4_info.src_port;
id->dst_port = l4_info.dst_port;
id->icmp_type = l4_info.icmp_type;
id->icmp_code = l4_info.icmp_code;
*flags = l4_info.flags;

return SUBMIT;
}

// sets flow fields from IPv6 header information
static inline int fill_ip6hdr(struct ipv6hdr *ip, void *data_end, flow_id *id, u16 *flags) {
struct l4_info_t l4_info;
void *l4_hdr_start;

l4_hdr_start = (void *)ip + sizeof(*ip);
if (l4_hdr_start > data_end) {
return DISCARD;
}
__builtin_memset(&l4_info, 0, sizeof(l4_info));
__builtin_memcpy(id->src_ip, ip->saddr.in6_u.u6_addr8, 16);
__builtin_memcpy(id->dst_ip, ip->daddr.in6_u.u6_addr8, 16);
id->transport_protocol = ip->nexthdr;
fill_l4info(l4_hdr_start, data_end, ip->nexthdr, &l4_info);
id->src_port = l4_info.src_port;
id->dst_port = l4_info.dst_port;
id->icmp_type = l4_info.icmp_type;
id->icmp_code = l4_info.icmp_code;
*flags = l4_info.flags;

return SUBMIT;
}
// sets flow fields from Ethernet header information
static inline int fill_ethhdr(struct ethhdr *eth, void *data_end, flow_id *id, u16 *flags) {
if ((void *)eth + sizeof(*eth) > data_end) {
return DISCARD;
}
__builtin_memcpy(id->dst_mac, eth->h_dest, ETH_ALEN);
__builtin_memcpy(id->src_mac, eth->h_source, ETH_ALEN);
id->eth_protocol = bpf_ntohs(eth->h_proto);

if (id->eth_protocol == ETH_P_IP) {
struct iphdr *ip = (void *)eth + sizeof(*eth);
return fill_iphdr(ip, data_end, id, flags);
} else if (id->eth_protocol == ETH_P_IPV6) {
struct ipv6hdr *ip6 = (void *)eth + sizeof(*eth);
return fill_ip6hdr(ip6, data_end, id, flags);
} else {
// TODO : Need to implement other specific ethertypes if needed
// For now other parts of flow id remain zero
__builtin_memset(&(id->src_ip), 0, sizeof(struct in6_addr));
__builtin_memset(&(id->dst_ip), 0, sizeof(struct in6_addr));
id->transport_protocol = 0;
id->src_port = 0;
id->dst_port = 0;
}
return SUBMIT;
}

static inline int flow_monitor(struct __sk_buff *skb, u8 direction) {
// If sampling is defined, will only parse 1 out of "sampling" flows
Expand Down Expand Up @@ -317,4 +101,5 @@ SEC("tc_egress")
int egress_flow_parse(struct __sk_buff *skb) {
return flow_monitor(skb, EGRESS);
}

char _license[] SEC("license") = "GPL";
Loading