Skip to content
This repository has been archived by the owner on Jul 9, 2024. It is now read-only.

Add ASG tag to control terminate instance or not #35

Merged
merged 1 commit into from
Apr 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ Then, `kubectl apply -f deploy/mm.yaml`.
* "use-spot": This will make the minion-manager intelligently use spot instances in the ASG
* "no-spot": This will make the minion-manager always use on-demand instances in the ASG. This is useful when someone wants to temporarily switch to on-demand instances and at a later point switch to "use-spot"
* Note that after changing the tag value, it may take upto 5 minutes for the minion-manager pod to see the changes and make them take effect.
* The "k8s-minion-manager/not-terminate" tag can control ASG instance terminate by the minion-manager. If you want to control when to terminate ASG instances. You can set this tag to `true`. If not set or other value will disable this feature.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this tag for the ASG or for specific instances in the ASG? If the tag is for the ASG itself, better option would be to simply set the value of the k8s-minion-manager tag to no-spot. That way, the instances in the ASG will use on-demand instances and never switch (therefore, never terminate) instances.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want to have chance to keep an instances running instead of auto terminated. When I use use-spot. And there have change MM changed launch config to on-demand. Then switch back to spot.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, the tag is for each instance, correct?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it is set on ASG.
If change the k8s-minion-manager from use-spot to no-spot. Then MM will terminate all spot instance one by one. And vice versa.
So that I want to add a tag on ASG. Let MM know if this tag exist. Don't terminate instance for this ASG.
When that tag is removed or value changed. MM will handle that ASG instance termination.


**What happens when:**

Expand Down
18 changes: 16 additions & 2 deletions cloud_provider/aws/asg_mm.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
""" Metadata for each Autoscaling group in AWS. """
MINION_MANAGER_LABEL = 'k8s-minion-manager'
NOT_TERMINATE_LABEL = MINION_MANAGER_LABEL+'/not-terminate'


class AWSAutoscalinGroupMM(object):
Expand Down Expand Up @@ -30,9 +32,14 @@ def set_asg_info(self, asg_info):
assert asg_info is not None, "Can't set ASG info to None!"
self.asg_info = asg_info
for tag in self.asg_info['Tags']:
if tag.get('Key', None) == 'k8s-minion-manager':
if tag.get('Key', None) == MINION_MANAGER_LABEL:
if tag['Value'] not in ("use-spot", "no-spot"):
tag['Value'] = 'no-spot'
elif tag.get('Key', None) == NOT_TERMINATE_LABEL:
if tag['Value'] in ('true', 'True', 'TRUE'):
tag['Value'] = True
else:
tag['Value'] = False

def set_lc_info(self, lc_info):
""" Sets the lc_info. """
Expand Down Expand Up @@ -83,7 +90,7 @@ def get_instances(self):

def get_mm_tag(self):
for tag in self.asg_info['Tags']:
if tag.get('Key', None) == 'k8s-minion-manager':
if tag.get('Key', None) == MINION_MANAGER_LABEL:
return tag['Value'].lower()
return "no-spot"

Expand All @@ -108,3 +115,10 @@ def is_instance_running(self, instance):
return False

return True

def not_terminate_instance(self):
""" Retures is the ASG set not terminate instance """
for tag in self.asg_info['Tags']:
if tag.get('Key', None) == NOT_TERMINATE_LABEL:
return tag['Value']
return False
13 changes: 10 additions & 3 deletions cloud_provider/aws/aws_minion_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from cloud_provider.aws.price_info_reporter import AWSPriceReporter
from kubernetes import client, config
from ..base import MinionManagerBase
from .asg_mm import AWSAutoscalinGroupMM
from .asg_mm import AWSAutoscalinGroupMM, MINION_MANAGER_LABEL

logger = logging.getLogger("aws_minion_manager")
logging.basicConfig(format="%(asctime)s %(levelname)s %(name)s " +
Expand Down Expand Up @@ -111,7 +111,7 @@ def get_asgs_with_tags(cluster_name, ac_client):
if not is_candidate:
continue
for tag in r['Tags']:
if tag['Key'] == 'k8s-minion-manager':
if tag['Key'] == MINION_MANAGER_LABEL:
response["AutoScalingGroups"].append(r)
break
return bunchify(response)
Expand All @@ -123,7 +123,8 @@ def discover_asgs(self):
asg_mm = AWSAutoscalinGroupMM()
asg_mm.set_asg_info(asg)
self._asg_metas.append(asg_mm)
logger.info("Adding asg %s (%s)", asg_mm.get_name(), asg_mm.get_mm_tag())
logger.info("Adding asg %s (%s). Can manager terminate instance: %s", asg_mm.get_name(),
asg_mm.get_mm_tag(), "no " if asg_mm.not_terminate_instance() else "yes")

def populate_current_config(self):
"""
Expand Down Expand Up @@ -484,6 +485,12 @@ def schedule_instance_termination(self, asg_meta):
instances = asg_meta.get_instances()
if len(instances) == 0:
return

"""
Check is ASG set not to terminate instance
"""
if asg_meta.not_terminate_instance():
return

# If the ASG is configured to use "no-spot" or the required tag does not exist,
# do not schedule any instance termination.
Expand Down
59 changes: 48 additions & 11 deletions cloud_provider/aws/aws_minion_manager_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def get_ami(self):

@mock_autoscaling
@mock_sts
def create_mock_asgs(self, minion_manager_tag="use-spot"):
def create_mock_asgs(self, minion_manager_tag="use-spot", not_terminate=False):
"""
Creates mocked AWS resources.
"""
Expand All @@ -67,30 +67,36 @@ def create_mock_asgs(self, minion_manager_tag="use-spot"):
KeyName='kubernetes-some-key')
resp = bunchify(response)
assert resp.ResponseMetadata.HTTPStatusCode == 200

asg_tags = [{'ResourceId': self.cluster_name_id,
'Key': 'KubernetesCluster', 'Value': self.cluster_name_id},
{'ResourceId': self.cluster_name_id,
'Key': 'k8s-minion-manager', 'Value': minion_manager_tag},
{'ResourceId': self.cluster_name_id,
'PropagateAtLaunch': True,
'Key': 'Name', 'Value': "my-instance-name"},
]

if not_terminate:
asg_tags.append({'ResourceId': self.cluster_name_id,
'Key': 'k8s-minion-manager/not-terminate', 'Value': 'True'})

response = self.autoscaling.create_auto_scaling_group(
AutoScalingGroupName=self.asg_name,
LaunchConfigurationName=self.lc_name, MinSize=3, MaxSize=3,
DesiredCapacity=3,
AvailabilityZones=['us-west-2a'],
Tags=[{'ResourceId': self.cluster_name_id,
'Key': 'KubernetesCluster', 'Value': self.cluster_name_id},
{'ResourceId': self.cluster_name_id,
'Key': 'k8s-minion-manager', 'Value': minion_manager_tag},
{'ResourceId': self.cluster_name_id,
'PropagateAtLaunch': True,
'Key': 'Name', 'Value': "my-instance-name"},
]
Tags=asg_tags
)
resp = bunchify(response)
assert resp.ResponseMetadata.HTTPStatusCode == 200

def basic_setup_and_test(self, minion_manager_tag="use-spot"):
def basic_setup_and_test(self, minion_manager_tag="use-spot", not_terminate=False):
"""
Creates the mock setup for tests, creates the aws_mm object and does
some basic sanity tests before returning it.
"""
self.create_mock_asgs(minion_manager_tag)
self.create_mock_asgs(minion_manager_tag, not_terminate)
aws_mm = AWSMinionManager(self.cluster_name_id, "us-west-2", refresh_interval_seconds=50)
assert len(aws_mm.get_asg_metas()) == 0, \
"ASG Metadata already populated?"
Expand Down Expand Up @@ -298,6 +304,37 @@ def _instance_termination_test_helper(minion_manager_tag, expected_kill_threads)
_instance_termination_test_helper("use-spot", 3)
_instance_termination_test_helper("no-spot", 0)
_instance_termination_test_helper("abcd", 0)

@mock_autoscaling
@mock_ec2
@mock_sts
def test_instance_not_termination(self):
"""
Tests that the AWSMinionManager won't terminate instance with not-terminate tag.
"""
def _instance_termination_test_helper(minion_manager_tag, expected_kill_threads):
awsmm = self.basic_setup_and_test(minion_manager_tag, True)
# Inject `k8s-minion-manager/not-terminate` to awsmm

assert len(awsmm.on_demand_kill_threads) == 0
asg_meta = awsmm.get_asg_metas()[0]
# Set instanceType since moto's instances don't have it.
instance_type = "m3.medium"
zone = "us-west-2b"
awsmm.bid_advisor.on_demand_price_dict[instance_type] = "100"
awsmm.bid_advisor.spot_price_list = [{'InstanceType': instance_type,
'SpotPrice': '80',
'AvailabilityZone': zone}]
for instance in asg_meta.get_instances():
instance.InstanceType = instance_type
awsmm.populate_instances(asg_meta)
awsmm.schedule_instance_termination(asg_meta)
assert len(awsmm.on_demand_kill_threads) == expected_kill_threads

time.sleep(15)
assert len(awsmm.on_demand_kill_threads) == 0

_instance_termination_test_helper("use-spot", 0)

# PriceReporter tests
@mock_autoscaling
Expand Down