From aa94d3dd502df8f2bd34cc3e170f3b67b05c75f6 Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Mon, 26 Nov 2018 17:25:05 -0800 Subject: [PATCH] [autoscaler] Allow more than 5s from node creation to first heartbeat (#3385) --- python/ray/autoscaler/autoscaler.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/ray/autoscaler/autoscaler.py b/python/ray/autoscaler/autoscaler.py index 80245881cf78..9c4a452ee268 100644 --- a/python/ray/autoscaler/autoscaler.py +++ b/python/ray/autoscaler/autoscaler.py @@ -491,8 +491,10 @@ def files_up_to_date(self, node_id): def recover_if_needed(self, node_id): if not self.can_update(node_id): return - last_heartbeat_time = self.load_metrics.last_heartbeat_time_by_ip.get( - self.provider.internal_ip(node_id), 0) + key = self.provider.internal_ip(node_id) + if key not in self.load_metrics.last_heartbeat_time_by_ip: + self.load_metrics.last_heartbeat_time_by_ip[key] = time.time() + last_heartbeat_time = self.load_metrics.last_heartbeat_time_by_ip[key] delta = time.time() - last_heartbeat_time if delta < AUTOSCALER_HEARTBEAT_TIMEOUT_S: return