Skip to content

Commit

Permalink
Merge pull request #113 from github/xdp-stats
Browse files Browse the repository at this point in the history
glb-director-xdp: Add statsd metrics.
  • Loading branch information
theojulienne authored Sep 16, 2020
2 parents aa1ae91 + ccd9f9f commit 448e7d3
Show file tree
Hide file tree
Showing 9 changed files with 277 additions and 21 deletions.
1 change: 1 addition & 0 deletions script/cibuild
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ script/cibuild-prepare
script/test-glb-redirect
script/test-glb-healthcheck
script/test-glb-director
script/test-glb-director-xdp
67 changes: 53 additions & 14 deletions src/glb-director-xdp/bpf/glb_encap.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <string.h>

#include "bpf_helpers.h"
#include "glb_stats.h"

#include <glb-hashing/glb_gue.h>
#include <glb-hashing/pdnet.h>
Expand Down Expand Up @@ -95,6 +96,14 @@ struct bpf_map_def SEC("maps") glb_table_secrets = {
.max_entries = 4096,
};

struct bpf_map_def SEC("maps") glb_global_packet_counters = {
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
.key_size = sizeof(uint32_t),
.value_size = sizeof(struct glb_global_stats),
/* we don't actually need an array, but PERCPU_* only has multi-element types */
.max_entries = 1,
};

static __always_inline uint16_t compute_ipv4_checksum(void *iph) {
uint16_t *iph16 = (uint16_t *)iph;

Expand All @@ -117,7 +126,7 @@ static __always_inline uint16_t compute_ipv4_checksum(void *iph) {
* Expects that `eth_hdr` points to ROUTE_CONTEXT_ENCAP_SIZE(ctx) bytes of free space
* before the inner/original IP packet header begins.
*/
static __always_inline int glb_encapsulate_packet(struct pdnet_ethernet_hdr *eth_hdr, void *data_end, glb_route_context *route_context)
static __always_inline int glb_encapsulate_packet(struct pdnet_ethernet_hdr *eth_hdr, void *data_end, glb_route_context *route_context, struct glb_global_stats *g_stats)
{
if (route_context == NULL)
return XDP_DROP;
Expand All @@ -136,8 +145,10 @@ static __always_inline int glb_encapsulate_packet(struct pdnet_ethernet_hdr *eth

uint32_t config_bit = 0;
struct pdnet_mac_addr *gw_mac = (struct pdnet_mac_addr *)bpf_map_lookup_elem(&config_bits, &config_bit);
if (gw_mac == NULL)
if (gw_mac == NULL) {
g_stats->ErrorMissingGatewayMAC++;
return XDP_DROP;
}
eth_hdr->src_addr = route_context->orig_dst_mac;
eth_hdr->dst_addr = *gw_mac;
eth_hdr->ether_type = htons(PDNET_ETHER_TYPE_IPV4);
Expand All @@ -149,8 +160,10 @@ static __always_inline int glb_encapsulate_packet(struct pdnet_ethernet_hdr *eth

config_bit = 1;
uint32_t *src_ip = (uint32_t *)bpf_map_lookup_elem(&config_bits, &config_bit);
if (src_ip == NULL)
if (src_ip == NULL) {
g_stats->ErrorMissingSourceAddress++;
return XDP_DROP;
}

glb_bpf_printk(" src_ip: 0x%x\n", *src_ip);

Expand Down Expand Up @@ -224,6 +237,7 @@ static __always_inline int glb_encapsulate_packet(struct pdnet_ethernet_hdr *eth

glb_bpf_printk(" encaped!\n");

g_stats->Encapsulated++;
return XDP_TX;
}

Expand All @@ -233,6 +247,11 @@ static __always_inline int xdp_glb_director_process(struct xdp_md *ctx) {

// cat /sys/kernel/debug/tracing/trace_pipe
glb_bpf_printk("Greetings\n");

uint32_t stat = 0;
struct glb_global_stats *g_stats = bpf_map_lookup_elem(&glb_global_packet_counters, &stat);
if (g_stats == NULL) return XDP_PASS; /* this should always succeed, but we must bail if not for eBPF verifier */
g_stats->Processed++;

int rc = XDP_PASS;

Expand All @@ -242,8 +261,10 @@ static __always_inline int xdp_glb_director_process(struct xdp_md *ctx) {
route_context.packet_end = data_end;
rc = glb_extract_packet_fields(&route_context);
glb_bpf_printk(" parse rc = %d\n", rc);
if (rc != 0)
if (rc != 0) {
g_stats->UnknownFormat++;
return XDP_PASS;
}

glb_bpf_printk(" dst_addr: 0x%x\n", route_context.dst_addr.ipv4);
glb_bpf_printk(" src_addr: 0x%x\n", route_context.src_addr.ipv4);
Expand All @@ -264,26 +285,36 @@ static __always_inline int xdp_glb_director_process(struct xdp_md *ctx) {
glb_bpf_printk(" bind proto: 0x%x\n", bind.proto);

uint32_t *table_id_ptr = (uint32_t *)bpf_map_lookup_elem(&glb_binds, &bind);
if (table_id_ptr == NULL)
if (table_id_ptr == NULL) {
g_stats->NoMatchingBind++;
return XDP_PASS;
}

g_stats->Matched++;

uint32_t table_id = *table_id_ptr;
glb_bpf_printk(" bind maps to table id: %d\n", table_id);

struct bpf_map_def *table = (struct bpf_map_def *)bpf_map_lookup_elem(&glb_tables, &table_id);
glb_bpf_printk(" bind maps to table fd: 0x%p\n", table);
if (table == NULL)
if (table == NULL) {
g_stats->ErrorTable++;
return XDP_PASS; // we don't know
}

uint8_t *secret = (uint8_t *)bpf_map_lookup_elem(&glb_table_secrets, &table_id);
glb_bpf_printk(" table secret: 0x%p\n", secret);
if (secret == NULL)
if (secret == NULL) {
g_stats->ErrorSecret++;
return XDP_PASS; // we don't have a valid secret, bail
}

uint32_t config_bit = 3;
glb_director_hash_fields *hf_cfg_ptr = (glb_director_hash_fields *)bpf_map_lookup_elem(&config_bits, &config_bit);
if (hf_cfg_ptr == NULL)
if (hf_cfg_ptr == NULL) {
g_stats->ErrorHashConfig++;
return XDP_PASS;
}

// glb_bpf_printk(" dst_addr: 0x%x\n", route_context.dst_addr.ipv4);
// glb_bpf_printk(" src_addr: 0x%x\n", route_context.src_addr.ipv4);
Expand All @@ -298,8 +329,10 @@ static __always_inline int xdp_glb_director_process(struct xdp_md *ctx) {

glb_bpf_printk(" which is tableRow %d: 0x%p\n", tableRowIndex, tableRow);

if (tableRow == NULL)
if (tableRow == NULL) {
g_stats->ErrorMissingRow++;
return XDP_PASS; // we don't know
}

glb_bpf_printk(" table primary: %d\n", tableRow[0]);
glb_bpf_printk(" table secondary: %d\n", tableRow[1]);
Expand All @@ -313,8 +346,10 @@ static __always_inline int xdp_glb_director_process(struct xdp_md *ctx) {

config_bit = 4;
glb_director_hash_fields *hf_cfg_alt_ptr = (glb_director_hash_fields *)bpf_map_lookup_elem(&config_bits, &config_bit);
if (hf_cfg_alt_ptr == NULL)
if (hf_cfg_alt_ptr == NULL) {
g_stats->ErrorHashConfig++;
return XDP_PASS;
}

if (hf_cfg_alt_ptr->dst_addr || hf_cfg_alt_ptr->dst_port || hf_cfg_alt_ptr->src_addr || hf_cfg_alt_ptr->src_port) {
uint64_t hash = glb_compute_hash(&route_context, secret, hf_cfg_alt_ptr);
Expand All @@ -325,8 +360,10 @@ static __always_inline int xdp_glb_director_process(struct xdp_md *ctx) {

glb_bpf_printk(" which is tableRow (alt) %d: 0x%p\n", tableRowIndex, tableRow);

if (tableRow == NULL)
if (tableRow == NULL) {
g_stats->ErrorMissingRow++;
return XDP_PASS; // we don't know
}

glb_bpf_printk(" table (alt) primary: %d\n", tableRow[0]);
glb_bpf_printk(" table (alt) secondary: %d\n", tableRow[1]);
Expand All @@ -340,16 +377,18 @@ static __always_inline int xdp_glb_director_process(struct xdp_md *ctx) {

// encapsulate!
// we want to essentially remove (add to our start) an eth and add (subtract from our start) all the bits we need.
if (bpf_xdp_adjust_head(ctx, (int)sizeof(struct pdnet_ethernet_hdr) - (int)ROUTE_CONTEXT_ENCAP_SIZE(&route_context)))
if (bpf_xdp_adjust_head(ctx, (int)sizeof(struct pdnet_ethernet_hdr) - (int)ROUTE_CONTEXT_ENCAP_SIZE(&route_context))) {
g_stats->ErrorCreatingSpace++;
return XDP_DROP;
}

/* these must be retrieved again after the adjust_head */
data = (void*)(long)ctx->data;
data_end = (void *)(long)ctx->data_end;
if (data + ROUTE_CONTEXT_ENCAP_SIZE(&route_context) > data_end)
if (data + ROUTE_CONTEXT_ENCAP_SIZE(&route_context) > data_end) /* this is just to let the compiler know we checked for safety */
return XDP_DROP;

return glb_encapsulate_packet(data, data_end, &route_context);
return glb_encapsulate_packet(data, data_end, &route_context, g_stats);
}

SEC("xdp/xdp_glb_director")
Expand Down
45 changes: 45 additions & 0 deletions src/glb-director-xdp/bpf/glb_stats.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#ifndef _GLB_STATS_H_
#define _GLB_STATS_H_

/* NOTE: these fields follow Go naming conventions, because they are an interface with
* cgo which needs the fields to be capitalised to be exported and binary-unmarshal-able.
*/

typedef struct glb_global_stats {
/* The number of packets entering the XDP pipeline */
uint64_t Processed;
/* The number of packets that couldn't be parsed, meaning it wasn't the protocols we know how to parse.
* This isn't always an error, since we listen for any packets on the host.
*/
uint64_t UnknownFormat;
/* The number of packets that we could successfully parse, but then didn't match a bind.
* This is also expected in production.
*/
uint64_t NoMatchingBind;
/* The number of processed packets that matched a bind and should be included in the table stats */
uint64_t Matched;

/* The number of packets that made it all the way through to encapsulation and transmit. */
uint64_t Encapsulated;

/* The below errors are unexpected, and we generally expect none of them to occur.
* They might be useful to debug why the system isn't behaving as expected
*/

/* Internal Error: Reference of a table that we then couldn't look up */
uint64_t ErrorTable;
/* Internal Error: Reference of a table with no hashing secret */
uint64_t ErrorSecret;
/* Internal Error: The hash field configuration couldn't be retrieved */
uint64_t ErrorHashConfig;
/* Internal Error: We looked up a table, but the table didn't have a row where we expected */
uint64_t ErrorMissingRow;
/* Internal Error: We tried to create space to encapsulate the packet (at the front), but this failed */
uint64_t ErrorCreatingSpace;
/* Internal Error: The outbound gateway MAC address could not be read from configuration */
uint64_t ErrorMissingGatewayMAC;
/* Internal Error: The local machine's source IP address could not be read from configuration */
uint64_t ErrorMissingSourceAddress;
} glb_global_stats;

#endif
1 change: 1 addition & 0 deletions src/glb-director-xdp/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/github/glb-director/src/glb-director-xdp
go 1.14

require (
github.com/DataDog/datadog-go v4.0.0+incompatible // indirect
github.com/cilium/ebpf v0.0.0-20200901135951-4048cd641690
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf
github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815
Expand Down
2 changes: 2 additions & 0 deletions src/glb-director-xdp/go.sum
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
github.com/DataDog/datadog-go v4.0.0+incompatible h1:Dq8Dr+4sV1gBO1sHDWdW+4G+PdsA+YSJOK925MxrrCY=
github.com/DataDog/datadog-go v4.0.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
github.com/cilium/ebpf v0.0.0-20200901135951-4048cd641690 h1:GpWvisoWNFzz/RB0fMgyYUcFmF4G5MlYD9uUHoofayo=
github.com/cilium/ebpf v0.0.0-20200901135951-4048cd641690/go.mod h1:7cR51M8ViRLIdUjrmSXlK9pkrsDlLHbO8jiB8X8JnOc=
github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf h1:iW4rZ826su+pqaw19uhpSCzhj44qo35pNgKFGqzDKkU=
Expand Down
85 changes: 84 additions & 1 deletion src/glb-director-xdp/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
package main

import (
"github.com/DataDog/datadog-go/statsd"
"github.com/cilium/ebpf"
"github.com/coreos/go-systemd/daemon"
"github.com/docopt/docopt-go"
Expand All @@ -58,6 +59,7 @@ import (
#include <stdint.h>
#include "../glb-hashing/pdnet.h"
#include "bpf/glb_stats.h"
// meh
#include <sys/resource.h>
Expand Down Expand Up @@ -155,6 +157,8 @@ type GLBDirectorConfig struct {
DstPort bool `json:"dst_port"`
} `json:"alt_hash_fields"`

StatsdPort uint16 `json:"statsd_port"`

// unused by XDP version: num_worker_queues, flow_paths, lcores
}

Expand All @@ -180,6 +184,8 @@ type Application struct {
TableSpec *ebpf.MapSpec

ForwardingTablePath string

StatsClient *statsd.Client
}

func boolToC(a bool) C.uchar {
Expand Down Expand Up @@ -342,6 +348,82 @@ func (app *Application) ReloadForwardingTable() {
}
}

func (app *Application) InitStatsCollection() {
if app.Config.StatsdPort != 0 {
client, err := statsd.New(fmt.Sprintf("127.0.0.1:%d", app.Config.StatsdPort))
if err != nil {
log.Fatal(err)
}

client.Namespace = "glb.director."
client.Tags = append(client.Tags, "glb_engine:xdp")
app.StatsClient = client

globalCounters := app.Collection.Maps["glb_global_packet_counters"]
if globalCounters == nil {
log.Fatal("Could not load map glb_global_packet_counters")
}

go app.runStatsCollection(globalCounters)
}
}

func diffAndSumGlobalStats(last []C.glb_global_stats, curr []C.glb_global_stats) C.glb_global_stats {
sum := C.glb_global_stats{}

for cpuIndex := 0; cpuIndex < len(last); cpuIndex++ {
sum.Processed += curr[cpuIndex].Processed - last[cpuIndex].Processed
sum.Encapsulated += curr[cpuIndex].Encapsulated - last[cpuIndex].Encapsulated

sum.UnknownFormat += curr[cpuIndex].UnknownFormat - last[cpuIndex].UnknownFormat
sum.NoMatchingBind += curr[cpuIndex].NoMatchingBind - last[cpuIndex].NoMatchingBind
sum.Matched += curr[cpuIndex].Matched - last[cpuIndex].Matched

sum.ErrorTable += curr[cpuIndex].ErrorTable - last[cpuIndex].ErrorTable
sum.ErrorSecret += curr[cpuIndex].ErrorSecret - last[cpuIndex].ErrorSecret
sum.ErrorHashConfig += curr[cpuIndex].ErrorHashConfig - last[cpuIndex].ErrorHashConfig
sum.ErrorMissingRow += curr[cpuIndex].ErrorMissingRow - last[cpuIndex].ErrorMissingRow
sum.ErrorCreatingSpace += curr[cpuIndex].ErrorCreatingSpace - last[cpuIndex].ErrorCreatingSpace
sum.ErrorMissingGatewayMAC += curr[cpuIndex].ErrorMissingGatewayMAC - last[cpuIndex].ErrorMissingGatewayMAC
sum.ErrorMissingSourceAddress += curr[cpuIndex].ErrorMissingSourceAddress - last[cpuIndex].ErrorMissingSourceAddress
}

return sum
}

func (app *Application) runStatsCollection(globalCounters *ebpf.Map) {
var lastGlobalValues []C.struct_glb_global_stats
// grab the data at the start, this will ensure that at the first tick we can immediately emit data.
globalCounters.Lookup(uint32(0), &lastGlobalValues)

for range time.Tick(10 * time.Second) {
var globalValues []C.struct_glb_global_stats

err := globalCounters.Lookup(uint32(0), &globalValues)
if err == nil {
if len(lastGlobalValues) > 0 {
sum := diffAndSumGlobalStats(lastGlobalValues, globalValues)
app.StatsClient.Count("packets.processed", int64(sum.Processed), nil, 1)
app.StatsClient.Count("packets.encapsulated", int64(sum.Encapsulated), nil, 1)

app.StatsClient.Count("packets.results", int64(sum.UnknownFormat), []string{"result:UnknownFormat"}, 1)
app.StatsClient.Count("packets.results", int64(sum.NoMatchingBind), []string{"result:NoMatchingBind"}, 1)
app.StatsClient.Count("packets.results", int64(sum.Matched), []string{"result:Matched"}, 1)

app.StatsClient.Count("packets.internal_errors", int64(sum.ErrorTable), []string{"error:ErrorTable"}, 1)
app.StatsClient.Count("packets.internal_errors", int64(sum.ErrorSecret), []string{"error:ErrorSecret"}, 1)
app.StatsClient.Count("packets.internal_errors", int64(sum.ErrorHashConfig), []string{"error:ErrorHashConfig"}, 1)
app.StatsClient.Count("packets.internal_errors", int64(sum.ErrorMissingRow), []string{"error:ErrorMissingRow"}, 1)
app.StatsClient.Count("packets.internal_errors", int64(sum.ErrorCreatingSpace), []string{"error:ErrorCreatingSpace"}, 1)
app.StatsClient.Count("packets.internal_errors", int64(sum.ErrorMissingGatewayMAC), []string{"error:ErrorMissingGatewayMAC"}, 1)
app.StatsClient.Count("packets.internal_errors", int64(sum.ErrorMissingSourceAddress), []string{"error:ErrorMissingSourceAddress"}, 1)
}

lastGlobalValues = globalValues
}
}
}

func gracefullReloadByExec() {
fmt.Printf("Reloading by exec-ing a new version of glb-director-xdp\n")

Expand Down Expand Up @@ -372,7 +454,7 @@ func gracefullReloadByExec() {
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Env = append(os.Environ(),
"NOTIFY_SOCKET=" + readySock,
"NOTIFY_SOCKET="+readySock,
)

err = cmd.Start()
Expand Down Expand Up @@ -506,6 +588,7 @@ func main() {

// load up our entire config/forwarding table before we attach
// this makes the attach itself the atomic cut-over between reloads.
app.InitStatsCollection()
app.SyncConfigMap()
app.ReloadForwardingTable()

Expand Down
2 changes: 1 addition & 1 deletion src/glb-director/packaging/version.sh
Original file line number Diff line number Diff line change
@@ -1 +1 @@
GLB_DIRECTOR_VERSION="1.0.6"
GLB_DIRECTOR_VERSION="1.0.7"
Loading

0 comments on commit 448e7d3

Please sign in to comment.