Skip to content
This repository has been archived by the owner on Sep 12, 2022. It is now read-only.

Commit

Permalink
Fix unshelved instance failing to get fixed ip
Browse files Browse the repository at this point in the history
I changed the name of add_fixed_ip, because we are taking a quite different
approach here. We delete all an instance's ports, and then instruct nova to
attach the instance to a subnet. Nova takes care of creating a port.

This fixes an outstanding issue in nova[0]. Our original approach is to keep
ports around and then just update the fixed ips. However, when an instance and
its port are created, they each have an availability zone, when we unshelve an
instance its AZ may change. If it does change, openstack fails to update the
device owner AZ of the port and attaching a fixed ip fails because no ports
exist in the AZ.

We also used to exit from add_fixed_ip if the instance already had a fixed ip.
However, we can no longer do this. Instances affected by this shelving bug,
will never have their fixed ips removed, so if we exited early. Instances that
need a new port (on the new AZ) would not receive a new port.

https://bugs.launchpad.net/nova/+bug/1759924[0]
  • Loading branch information
cdosborn committed Mar 29, 2018
1 parent 3636a1b commit 5130640
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 51 deletions.
17 changes: 8 additions & 9 deletions service/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,28 +470,27 @@ def restore_ip_chain(esh_driver, esh_instance, deploy=True,
start with: task.apply_async()
"""
from service.tasks.driver import (
wait_for_instance, add_fixed_ip, add_floating_ip,
wait_for_instance, restore_port, add_floating_ip,
deploy_init_to, update_metadata
)
init_task = wait_for_instance.s(
esh_instance.id, esh_driver.__class__, esh_driver.provider,
esh_driver.identity, "active",
# TODO: DELETEME below.
no_tasks=True)
# Step 1: Set metadata to initializing

metadata = {'tmp_status': 'initializing'}
metadata_update_task = update_metadata.si(
esh_driver.__class__, esh_driver.provider, esh_driver.identity,
esh_instance.id, metadata, replace_metadata=False)

# Step 2: Add fixed
fixed_ip_task = add_fixed_ip.si(
restore_port_task = restore_port.si(
esh_driver.__class__, esh_driver.provider,
esh_driver.identity, esh_instance.id, core_identity_uuid)

init_task.link(metadata_update_task)
metadata_update_task.link(fixed_ip_task)
# Add float and re-deploy OR just add floating IP...
metadata_update_task.link(restore_port_task)

if deploy:
core_identity = CoreIdentity.objects.get(uuid=core_identity_uuid)
deploy_task = deploy_init_to.si(
Expand All @@ -501,16 +500,16 @@ def restore_ip_chain(esh_driver, esh_instance, deploy=True,
esh_instance.id,
core_identity,
redeploy=True)
fixed_ip_task.link(deploy_task)
restore_port_task.link(deploy_task)
else:
logger.info("Skip deployment, Add floating IP only")
floating_ip_task = add_floating_ip.si(
esh_driver.__class__,
esh_driver.provider,
esh_driver.identity,
str(core_identity_uuid),
esh_instance.id)
fixed_ip_task.link(floating_ip_task)
restore_port_task.link(floating_ip_task)

return init_task


Expand Down
67 changes: 25 additions & 42 deletions service/tasks/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,40 +168,35 @@ def _is_instance_ready(instance, status_query,
return instance.id
return True


@task(name="add_fixed_ip",
@task(name="restore_port",
ignore_result=True,
default_retry_delay=15,
max_retries=15)
def add_fixed_ip(
driverCls,
provider,
identity,
instance_id,
core_identity_uuid=None):
from service import instance as instance_service
max_retries=5,
bind=True)
def restore_port(self,
driverCls,
provider,
identity,
instance_id,
core_identity_uuid=None):
from service.instance import _to_network_driver, _get_network_id
try:
celery_logger.debug("add_fixed_ip task started at %s." % datetime.now())
driver = get_driver(driverCls, provider, identity)
core_identity = Identity.objects.get(uuid=core_identity_uuid)
network_driver = _to_network_driver(core_identity)
instance = driver.get_instance(instance_id)
if not instance:
celery_logger.debug("Instance has been teminated: %s." % instance_id)
return None
if instance._node.private_ips:
# TODO: Attempt to rescue
celery_logger.info("Instance has fixed IP: %s" % instance_id)
return instance

network_id = instance_service._get_network_id(driver, instance)
fixed_ip = driver._connection.ex_add_fixed_ip(instance, network_id)
celery_logger.debug("add_fixed_ip task finished at %s." % datetime.now())
return fixed_ip
except Exception as exc:
if "Not Ready" not in str(exc):
# Ignore 'normal' errors.
celery_logger.exception(exc)
add_fixed_ip.retry(exc=exc)
assert instance, "Instance {} no longer exists".format(instance_id)

ports = network_driver.list_ports(device_id=instance.id)
for port in ports:
network_driver.delete_port(port)

network_id = _get_network_id(driver, instance)
driver._connection.ex_attach_interface(
instance.id, network_id=network_id)
except Exception as exc:
celery_logger.exception(exc)
self.retry(exc=exc)

def current_openstack_identities():
identities = Identity.objects.filter(
Expand Down Expand Up @@ -1197,20 +1192,8 @@ def add_floating_ip(driverCls, provider, identity, core_identity_uuid,
countdown = min(2**current.request.retries, 128)
add_floating_ip.retry(exc=floating_ip_err,
countdown=countdown)
except NeutronBadRequest as bad_request:
# NOTE: 'Neutron Bad Request' is a good message to 'catch and fix'
# because its a user-supplied problem.
# Here we will attempt to 'fix' requests and put the 'add_floating_ip'
# task back on the queue after we're done.
celery_logger.exception("Neutron did not accept request - %s."
% bad_request.message)
if 'no fixed ip' in bad_request.message.lower():
fixed_ip = add_fixed_ip(driverCls, provider, identity,
instance_alias)
if fixed_ip:
celery_logger.debug("Fixed IP %s has been added to Instance %s."
% (fixed_ip, instance_alias))
# let the exception bubble-up for a retry..
except NeutronBadRequest:
# This is an error on our end, we want it to surface
raise
except (BaseException, Exception) as exc:
celery_logger.exception("Error occurred while assigning a floating IP")
Expand Down

0 comments on commit 5130640

Please sign in to comment.