From 980344ee242ccf286ae9e57ff0b13b782a5c5df5 Mon Sep 17 00:00:00 2001 From: Samuel Beaulieu Date: Mon, 25 Jul 2022 08:29:11 -0500 Subject: [PATCH] (bug) Prevent failing VMs to be retried infinitely (ondemand) Normally when a VM is failing the vm_ready? check, it is moved to the completed queue which deletes it. In a pooled config a new VM will be retried. For ondemand, we would also recreate the task to trigger the creation of a new VMs. There was a bug where an ondemand request would be retried infinitely when vm_ready? would always fail. We would never check the status of the request if it was deleted via the API or if it was detected as failed because it is expired (over the ondemand_request_ttl limit) --- lib/vmpooler/pool_manager.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/vmpooler/pool_manager.rb b/lib/vmpooler/pool_manager.rb index 9ed29822..9c3f23c1 100644 --- a/lib/vmpooler/pool_manager.rb +++ b/lib/vmpooler/pool_manager.rb @@ -119,7 +119,13 @@ def fail_pending_vm(vm, pool, timeout, redis, exists: true) pool_alias = redis.hget("vmpooler__vm__#{vm}", 'pool_alias') if request_id redis.multi redis.smove("vmpooler__pending__#{pool}", "vmpooler__completed__#{pool}", vm) - redis.zadd('vmpooler__odcreate__task', 1, "#{pool_alias}:#{pool}:1:#{request_id}") if request_id + if request_id + ondemandrequest_hash = redis.hgetall("vmpooler__odrequest__#{request_id}") + if ondemandrequest_hash && ondemandrequest_hash['status'] != 'failed' && ondemandrequest_hash['status'] != 'deleted' + # will retry a VM that did not come up as vm_ready? only if it has not been market failed or deleted + redis.zadd('vmpooler__odcreate__task', 1, "#{pool_alias}:#{pool}:1:#{request_id}") + end + end redis.exec $metrics.increment("errors.markedasfailed.#{pool}") $logger.log('d', "[!] [#{pool}] '#{vm}' marked as 'failed' after #{timeout} minutes")