Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(healthcheck) lower the cleanup check frequency #116

Merged
merged 1 commit into from
Jul 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 32 additions & 23 deletions lib/resty/healthcheck.lua
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ local CHECK_JITTER = CHECK_INTERVAL * 0.1
-- the check interval. If it does not update the shm during this period, we
-- consider that it is not able to continue checking (the worker probably was killed)
local LOCK_PERIOD = CHECK_INTERVAL * 15
-- interval between stale targets cleanup
local CLEANUP_INTERVAL = CHECK_INTERVAL * 25

-- Counters: a 32-bit shm integer can hold up to four 8-bit counters.
local CTR_SUCCESS = 0x00000001
Expand Down Expand Up @@ -200,6 +202,7 @@ local hcs = setmetatable({}, {
})

local active_check_timer
local last_cleanup_check

-- TODO: improve serialization speed
-- serialize a table to a string
Expand Down Expand Up @@ -1630,6 +1633,7 @@ function _M.new(opts)
self:log(DEBUG, "worker ", ngx_worker_id(), " (pid: ", ngx_worker_pid(), ") ",
"starting active check timer")
local shm, key = self.shm, self.PERIODIC_LOCK
last_cleanup_check = ngx_now()
active_check_timer, err = resty_timer({
recurring = true,
interval = CHECK_INTERVAL,
Expand All @@ -1647,34 +1651,39 @@ function _M.new(opts)

local cur_time = ngx_now()
for _, checker_obj in pairs(hcs) do
-- clear targets marked for delayed removal
locking_target_list(checker_obj, function(target_list)
local removed_targets = {}
local index = 1
while index <= #target_list do
local target = target_list[index]
if target.purge_time and target.purge_time <= cur_time then
table_insert(removed_targets, target)
table_remove(target_list, index)
else
index = index + 1

if (last_cleanup_check + CLEANUP_INTERVAL) < cur_time then
-- clear targets marked for delayed removal
locking_target_list(checker_obj, function(target_list)
local removed_targets = {}
local index = 1
while index <= #target_list do
local target = target_list[index]
if target.purge_time and target.purge_time <= cur_time then
table_insert(removed_targets, target)
table_remove(target_list, index)
else
index = index + 1
end
end
end

if #removed_targets > 0 then
target_list = serialize(target_list)
if #removed_targets > 0 then
target_list = serialize(target_list)

local ok, err = shm:set(checker_obj.TARGET_LIST, target_list)
if not ok then
return nil, "failed to store target_list in shm: " .. err
end
local ok, err = shm:set(checker_obj.TARGET_LIST, target_list)
if not ok then
return nil, "failed to store target_list in shm: " .. err
end

for _, target in ipairs(removed_targets) do
clear_target_data_from_shm(checker_obj, target.ip, target.port, target.hostname)
checker_obj:raise_event(checker_obj.events.remove, target.ip, target.port, target.hostname)
for _, target in ipairs(removed_targets) do
clear_target_data_from_shm(checker_obj, target.ip, target.port, target.hostname)
checker_obj:raise_event(checker_obj.events.remove, target.ip, target.port, target.hostname)
end
end
end
end)
end)

last_cleanup_check = cur_time
end

if checker_obj.checks.active.healthy.active and
(checker_obj.checks.active.healthy.last_run +
Expand Down
4 changes: 2 additions & 2 deletions t/11-clear.t
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ unhealthy HTTP increment (3/3) for '(127.0.0.1:21120)'

local checker2 = healthcheck.new(config)
ngx.say(checker2:get_target_status("127.0.0.1", 10001))
ngx.sleep(0.4) -- wait while the targets are cleared
ngx.sleep(2.6) -- wait while the targets are cleared
local status, err = checker2:get_target_status("127.0.0.1", 10001)
if status ~= nil then
ngx.say(status)
Expand Down Expand Up @@ -251,7 +251,7 @@ target not found
local checker2 = healthcheck.new(config)
checker2:add_target("127.0.0.1", 10002, nil, true)
ngx.say(checker2:get_target_status("127.0.0.1", 10002))
ngx.sleep(0.4) -- wait while the targets would be cleared
ngx.sleep(2.6) -- wait while the targets would be cleared
local status, err = checker2:get_target_status("127.0.0.1", 10001)
if status ~= nil then
ngx.say(status)
Expand Down