Skip to content

Commit

Permalink
Merge pull request kubernetes#53 from Shopify/adding-monitoring-modules
Browse files Browse the repository at this point in the history
Emit response StatsD metrics about upstreams when dynamic configuration is enabled
  • Loading branch information
andrewloux authored May 25, 2018
2 parents a1be78c + fadcba2 commit 886d67f
Show file tree
Hide file tree
Showing 9 changed files with 407 additions and 36 deletions.
7 changes: 4 additions & 3 deletions rootfs/etc/nginx/lua/balancer/ewma.lua
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

local resty_lock = require("resty.lock")
local util = require("util")
local split = require("util.split")

local DECAY_TIME = 10 -- this value is in seconds
local LOCK_KEY = ":ewma_key"
Expand Down Expand Up @@ -128,10 +129,10 @@ function _M.balance(backend)
end

function _M.after_balance()
local response_time = tonumber(util.get_first_value(ngx.var.upstream_response_time)) or 0
local connect_time = tonumber(util.get_first_value(ngx.var.upstream_connect_time)) or 0
local response_time = tonumber(split.get_first_value(ngx.var.upstream_response_time)) or 0
local connect_time = tonumber(split.get_first_value(ngx.var.upstream_connect_time)) or 0
local rtt = connect_time + response_time
local upstream = util.get_first_value(ngx.var.upstream_addr)
local upstream = split.get_first_value(ngx.var.upstream_addr)

if util.is_blank(upstream) then
return
Expand Down
3 changes: 2 additions & 1 deletion rootfs/etc/nginx/lua/balancer/resty.lua
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
local resty_roundrobin = require("resty.roundrobin")
local resty_chash = require("resty.chash")
local util = require("util")
local split = require("util.split")
local ck = require("resty.cookie")

local _M = {}
Expand Down Expand Up @@ -114,7 +115,7 @@ function _M.balance(backend)
endpoint_string = instance:find()
end

local address, port = util.split_pair(endpoint_string, ":")
local address, port = split.split_pair(endpoint_string, ":")
return { address = address, port = port }
end

Expand Down
78 changes: 78 additions & 0 deletions rootfs/etc/nginx/lua/monitor.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
local statsd = require('statsd')
local defer = require("util.defer")
local split = require("util.split")

local _M = {}

local function send_response_data(upstream_state, client_state)
local status_class
if upstream_state.status then
for i, status in ipairs(upstream_state.status) do
-- TODO: link this with the zones when we use openresty-upstream
if status == '-' then
status = 'ngx_error'
status_class = 'ngx_error'
else
status_class = string.sub(status, 0, 1) .. "xx"
end

ngx.log(ngx.INFO, upstream_state.addr[i] )
statsd.increment('ingress.nginx.upstream.response', 1, {
status=status,
status_class=status_class,
upstream_name=client_state.upstream_name
})

statsd.histogram('ingress.nginx.upstream.response_time',
upstream_state.response_time[i], {
upstream_name=client_state.upstream_name
})
end
end

status_class = string.sub(client_state.status, 0, 1) .. "xx"
statsd.increment('ingress.nginx.client.response', 1, {
status=client_state.status,
status_class=status_class,
upstream_name=client_state.upstream_name
})

statsd.histogram('ingress.nginx.client.request_time', client_state.request_time, {
upstream_name=client_state.upstream_name
})
end

function _M.call()
local status, status_err = split.split_upstream_var(ngx.var.upstream_status)
if status_err then
return nil, status_err
end

local addrs, addrs_err = split.split_upstream_addr(ngx.var.upstream_addr)
if addrs_err then
return nil, addrs_err
end

local response_time, rt_err = split.split_upstream_var(ngx.var.upstream_response_time)
if rt_err then
return nil, rt_err
end

local ok, err = defer.to_timer_phase(send_response_data, {
status=status,
addr=addrs,
response_time=response_time
}, {
status=ngx.var.status,
request_time=ngx.var.request_time,
upstream_name=ngx.var.proxy_upstream_name
})

if not ok then
local msg = "failed to send response data: " .. tostring(err)
ngx.log(ngx.ERR, msg)
return nil, msg
end
end

return _M
178 changes: 178 additions & 0 deletions rootfs/etc/nginx/lua/statsd.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
local pairs = pairs
local string_format = string.format
local string_len = string.len
local udp = ngx.socket.udp

local util = require("util")
local defer = require("util.defer")

local util_tablelength = util.tablelength

local _M = {}
local default_tag_string = "|#"

local METRIC_COUNTER = "c"
local METRIC_GAUGE = "g"
local METRIC_HISTOGRAM = "h"
local METRIC_SET = "s"
local MICROSECONDS = 1000000

local function create_udp_socket(host, port)
local sock, sock_err = udp()
if not sock then
return nil, sock_err
end

local ok, peer_err = sock:setpeername(host, port)
if not ok then
return nil, peer_err
end

return sock, nil
end

local function get_udp_socket(host, port)
local id = string_format("%s:%d", host, port)
local ctx = ngx.ctx.statsd_sockets
local err

if not ctx then
ctx = {}
ngx.ctx.statsd_sockets = ctx
end

local sock = ctx[id]
if not sock then
sock, err = create_udp_socket(host, port)
if not sock then
return nil, err
end

ctx[id] = sock
end

return sock, nil
end

local function generate_tag_string(tags)
if not tags or util_tablelength(tags) == 0 then
return ""
end

local tag_str = default_tag_string
for k,v in pairs(tags) do
if string_len(tag_str) > 2 then
tag_str = tag_str .. ","
end
tag_str = tag_str .. k .. ":" .. v
end

return tag_str
end

local function generate_packet(metric, key, value, tags, sampling_rate)
if sampling_rate == 1 then
sampling_rate = ""
else
sampling_rate = string_format("|@%g", sampling_rate)
end

return string_format("%s:%s|%s%s%s", key, tostring(value), metric, sampling_rate, generate_tag_string(tags))
end

local function metric(metric_type, key, value, tags, sample_rate)
if not value then
return nil, "no value passed"
end
if value == '-' then
return nil, nil -- don't pass an error to avoid logging to error log
end

if not _M.config then
return true, nil
end

local sampling_rate = sample_rate or _M.config.sampling_rate

if sampling_rate ~= 1 and math.random() > sampling_rate then
return nil, nil -- don't pass an error to avoid logging to error log
end

local packet = generate_packet(metric_type, key, value, tags, sampling_rate)

local sock, err = get_udp_socket(_M.config.host, _M.config.port)
if not sock then
return nil, err
end

return sock:send(packet)
end

local function send_metrics(...)
local ok, err = metric(...)
if not ok and err then
ngx.log(ngx.WARN, "failed logging to statsd: " .. tostring(err))
end
return ok, err
end

-- to avoid logging everywhere in #metric
local function log_metric(...)
local ok, err = defer.to_timer_phase(send_metrics, ...)
if not ok then
local msg = "failed to log metric: " .. tostring(err)
ngx.log(ngx.ERR, msg)
return nil, msg
end
return true
end

-- Statsd module level convenince functions

function _M.increment(key, value, tags, ...)
return log_metric(METRIC_COUNTER, key, value or 1, tags, ...)
end

function _M.gauge(key, value, tags, ...)
return log_metric(METRIC_GAUGE, key, value, tags, ...)
end

function _M.histogram(key, value, tags, ...)
return log_metric(METRIC_HISTOGRAM, key, value, tags, ...)
end

function _M.set(key, value, tags, ...)
return log_metric(METRIC_SET, key, value, tags, ...)
end

function _M.time(f)
local start_time = ngx.now()
local ret = { f() }
return ret, (ngx.now() - start_time) * MICROSECONDS
end

function _M.measure(key, f, tags)
local ret, time = _M.time(f)
_M.histogram(key, time, tags or {})
return unpack(ret)
end

_M.config = {
host = os.getenv("STATSD_HOST"),
port = os.getenv("STATSD_PORT"),
sampling_rate = 1.0,
tags = {
pod_id = os.getenv("POD_NAME"),
namespace = os.getenv("POD_NAMESPACE")
}
}

if not _M.config.host or not _M.config.port then
error("STATSD_HOST and STATSD_PORT env variables must be set")
end

if _M.config.tags then
default_tag_string = generate_tag_string(_M.config.tags)
end

return _M
38 changes: 6 additions & 32 deletions rootfs/etc/nginx/lua/util.lua
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,6 @@ function _M.lua_ngx_var(ngx_var)
return ngx.var[var_name]
end

function _M.split_pair(pair, seperator)
local i = pair:find(seperator)
if i == nil then
return pair, nil
else
local name = pair:sub(1, i - 1)
local value = pair:sub(i + 1, -1)
return name, value
end
end

-- this implementation is taken from
-- https://web.archive.org/web/20131225070434/http://snippets.luacode.org/snippets/Deep_Comparison_of_Two_Values_3
-- and modified for use in this project
Expand Down Expand Up @@ -76,28 +65,13 @@ function _M.is_blank(str)
return str == nil or string_len(str) == 0
end

-- http://nginx.org/en/docs/http/ngx_http_upstream_module.html#example
-- CAVEAT: nginx is giving out : instead of , so the docs are wrong
-- 127.0.0.1:26157 : 127.0.0.1:26157 , ngx.var.upstream_addr
-- 200 : 200 , ngx.var.upstream_status
-- 0.00 : 0.00, ngx.var.upstream_response_time
function _M.split_upstream_var(var)
if not var then
return nil, nil
-- statsd helpers
function _M.tablelength(T)
local count = 0
for _ in pairs(T) do
count = count + 1
end
local t = {}
for v in var:gmatch("[^%s|,]+") do
if v ~= ":" then
t[#t+1] = v
end
end
return t
end

function _M.get_first_value(var)
local t = _M.split_upstream_var(var) or {}
if #t == 0 then return nil end
return t[1]
return count
end

-- this implementation is taken from:
Expand Down
Loading

0 comments on commit 886d67f

Please sign in to comment.