From af002eb03892374eda4654f9218bb6f8f3ad9973 Mon Sep 17 00:00:00 2001 From: Connor Osborn Date: Thu, 29 Mar 2018 13:28:39 -0700 Subject: [PATCH] Fix unshelved instance failing to get fixed ip I changed the name of add_fixed_ip, because we are taking a quite different approach here. We delete all an instance's ports, and then instruct nova to attach the instance to a subnet. Nova takes care of creating a port. This fixes an outstanding issue in nova[0]. Our original approach is to keep ports around and then just update the fixed ips. However, when an instance and its port are created, they each have an availability zone, when we unshelve an instance its AZ may change. If it does change, openstack fails to update the device owner AZ of the port and attaching a fixed ip fails because no ports exist in the AZ. We also used to exit from add_fixed_ip if the instance already had a fixed ip. However, we can no longer do this. Instances affected by this shelving bug, will never have their fixed ips removed, so if we exited early. Instances that need a new port (on the new AZ) would not receive a new port. https://bugs.launchpad.net/nova/+bug/1759924[0] --- service/instance.py | 17 +++++------ service/tasks/driver.py | 67 +++++++++++++++-------------------------- 2 files changed, 33 insertions(+), 51 deletions(-) diff --git a/service/instance.py b/service/instance.py index 6b907547b..7a9016721 100644 --- a/service/instance.py +++ b/service/instance.py @@ -470,7 +470,7 @@ def restore_ip_chain(esh_driver, esh_instance, deploy=True, start with: task.apply_async() """ from service.tasks.driver import ( - wait_for_instance, add_fixed_ip, add_floating_ip, + wait_for_instance, restore_port, add_floating_ip, deploy_init_to, update_metadata ) init_task = wait_for_instance.s( @@ -478,20 +478,19 @@ def restore_ip_chain(esh_driver, esh_instance, deploy=True, esh_driver.identity, "active", # TODO: DELETEME below. no_tasks=True) - # Step 1: Set metadata to initializing + metadata = {'tmp_status': 'initializing'} metadata_update_task = update_metadata.si( esh_driver.__class__, esh_driver.provider, esh_driver.identity, esh_instance.id, metadata, replace_metadata=False) - # Step 2: Add fixed - fixed_ip_task = add_fixed_ip.si( + restore_port_task = restore_port.si( esh_driver.__class__, esh_driver.provider, esh_driver.identity, esh_instance.id, core_identity_uuid) init_task.link(metadata_update_task) - metadata_update_task.link(fixed_ip_task) - # Add float and re-deploy OR just add floating IP... + metadata_update_task.link(restore_port_task) + if deploy: core_identity = CoreIdentity.objects.get(uuid=core_identity_uuid) deploy_task = deploy_init_to.si( @@ -501,16 +500,16 @@ def restore_ip_chain(esh_driver, esh_instance, deploy=True, esh_instance.id, core_identity, redeploy=True) - fixed_ip_task.link(deploy_task) + restore_port_task.link(deploy_task) else: - logger.info("Skip deployment, Add floating IP only") floating_ip_task = add_floating_ip.si( esh_driver.__class__, esh_driver.provider, esh_driver.identity, str(core_identity_uuid), esh_instance.id) - fixed_ip_task.link(floating_ip_task) + restore_port_task.link(floating_ip_task) + return init_task diff --git a/service/tasks/driver.py b/service/tasks/driver.py index 22c956c37..cc22ce83b 100644 --- a/service/tasks/driver.py +++ b/service/tasks/driver.py @@ -168,40 +168,35 @@ def _is_instance_ready(instance, status_query, return instance.id return True - -@task(name="add_fixed_ip", +@task(name="restore_port", ignore_result=True, default_retry_delay=15, - max_retries=15) -def add_fixed_ip( - driverCls, - provider, - identity, - instance_id, - core_identity_uuid=None): - from service import instance as instance_service + max_retries=5, + bind=True) +def restore_port(self, + driverCls, + provider, + identity, + instance_id, + core_identity_uuid=None): + from service.instance import _to_network_driver, _get_network_id try: - celery_logger.debug("add_fixed_ip task started at %s." % datetime.now()) driver = get_driver(driverCls, provider, identity) + core_identity = Identity.objects.get(uuid=core_identity_uuid) + network_driver = _to_network_driver(core_identity) instance = driver.get_instance(instance_id) - if not instance: - celery_logger.debug("Instance has been teminated: %s." % instance_id) - return None - if instance._node.private_ips: - # TODO: Attempt to rescue - celery_logger.info("Instance has fixed IP: %s" % instance_id) - return instance - - network_id = instance_service._get_network_id(driver, instance) - fixed_ip = driver._connection.ex_add_fixed_ip(instance, network_id) - celery_logger.debug("add_fixed_ip task finished at %s." % datetime.now()) - return fixed_ip - except Exception as exc: - if "Not Ready" not in str(exc): - # Ignore 'normal' errors. - celery_logger.exception(exc) - add_fixed_ip.retry(exc=exc) + assert instance, "Instance {} no longer exists".format(instance_id) + + ports = network_driver.list_ports(device_id=instance.id) + for port in ports: + network_driver.delete_port(port) + network_id = _get_network_id(driver, instance) + driver._connection.ex_attach_interface( + instance.id, network_id=network_id) + except Exception as exc: + celery_logger.exception(exc) + self.retry(exc=exc) def current_openstack_identities(): identities = Identity.objects.filter( @@ -1197,20 +1192,8 @@ def add_floating_ip(driverCls, provider, identity, core_identity_uuid, countdown = min(2**current.request.retries, 128) add_floating_ip.retry(exc=floating_ip_err, countdown=countdown) - except NeutronBadRequest as bad_request: - # NOTE: 'Neutron Bad Request' is a good message to 'catch and fix' - # because its a user-supplied problem. - # Here we will attempt to 'fix' requests and put the 'add_floating_ip' - # task back on the queue after we're done. - celery_logger.exception("Neutron did not accept request - %s." - % bad_request.message) - if 'no fixed ip' in bad_request.message.lower(): - fixed_ip = add_fixed_ip(driverCls, provider, identity, - instance_alias) - if fixed_ip: - celery_logger.debug("Fixed IP %s has been added to Instance %s." - % (fixed_ip, instance_alias)) - # let the exception bubble-up for a retry.. + except NeutronBadRequest: + # This is an error on our end, we want it to surface raise except (BaseException, Exception) as exc: celery_logger.exception("Error occurred while assigning a floating IP")