Skip to content

Commit

Permalink
[VCDA-2571] Use cpu/mem fields for native cluster operations (vmware#…
Browse files Browse the repository at this point in the history
…1181)

* Use cpu/mem fields for native cluster operations

Signed-off-by: Aniruddha Shamasundar <aniruddha.9794@gmail.com>

* Address review comment

Signed-off-by: Aniruddha Shamasundar <aniruddha.9794@gmail.com>

* Include CSE 3.1.1 in allowed upgrade paths

Signed-off-by: Aniruddha Shamasundar <aniruddha.9794@gmail.com>

* Address review comments

Signed-off-by: Aniruddha Shamasundar <aniruddha.9794@gmail.com>

* Address review comments

Signed-off-by: Aniruddha Shamasundar <aniruddha.9794@gmail.com>
  • Loading branch information
Anirudh9794 authored Sep 3, 2021

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent 3c37748 commit caabbc0
Showing 6 changed files with 123 additions and 42 deletions.
Original file line number Diff line number Diff line change
@@ -144,6 +144,8 @@ class FlattenedClusterSpecKey2X(Enum):
WORKERS_COUNT = 'topology.workers.count'
WORKERS_SIZING_CLASS = 'topology.workers.sizingClass'
WORKERS_STORAGE_PROFILE = 'topology.workers.storageProfile'
WORKERS_CPU_COUNT = 'topology.workers.cpu'
WORKERS_MEMORY_MB = 'topology.workers.memory'
NFS_COUNT = 'topology.nfs.count'
NFS_SIZING_CLASS = 'topology.nfs.sizingClass'
NFS_STORAGE_PROFILE = 'topology.nfs.storageProfile'
2 changes: 1 addition & 1 deletion container_service_extension/installer/configure_cse.py
Original file line number Diff line number Diff line change
@@ -730,7 +730,7 @@ def upgrade_cse(config_file_name, config, skip_template_creation,
# CSE version info in extension description is only applicable for
# CSE 3.0.0+ versions.
allowed_source_cse_versions = \
semantic_version.SimpleSpec('>=3.0.0,<=3.1.0')
semantic_version.SimpleSpec('>=3.0.0,<=3.1.1')
valid_source_cse_installation = \
allowed_source_cse_versions.match(ext_cse_version)

39 changes: 36 additions & 3 deletions container_service_extension/rde/backend/cluster_service_2_x.py
Original file line number Diff line number Diff line change
@@ -843,7 +843,11 @@ def _create_cluster_async(self, cluster_id: str,
ovdc_name = input_native_entity.metadata.virtual_data_center_name
num_workers = input_native_entity.spec.topology.workers.count
control_plane_sizing_class = input_native_entity.spec.topology.control_plane.sizing_class # noqa: E501
control_plane_cpu_count = input_native_entity.spec.topology.control_plane.cpu # noqa: E501
control_plane_memory_mb = input_native_entity.spec.topology.control_plane.memory # noqa: E501
worker_sizing_class = input_native_entity.spec.topology.workers.sizing_class # noqa: E501
worker_cpu_count = input_native_entity.spec.topology.workers.cpu
worker_memory_mb = input_native_entity.spec.topology.workers.memory
control_plane_storage_profile = input_native_entity.spec.topology.control_plane.storage_profile # noqa: E501
worker_storage_profile = input_native_entity.spec.topology.workers.storage_profile # noqa: E501
nfs_count = input_native_entity.spec.topology.nfs.count
@@ -917,6 +921,8 @@ def _create_cluster_async(self, cluster_id: str,
network_name=network_name,
storage_profile=control_plane_storage_profile,
ssh_key=ssh_key,
cpu_count=control_plane_cpu_count,
memory_mb=control_plane_memory_mb,
sizing_class_name=control_plane_sizing_class)
except Exception as err:
LOGGER.error(err, exc_info=True)
@@ -976,7 +982,9 @@ def _create_cluster_async(self, cluster_id: str,
network_name=network_name,
storage_profile=worker_storage_profile,
ssh_key=ssh_key,
sizing_class_name=worker_sizing_class)
sizing_class_name=worker_sizing_class,
cpu_count=worker_cpu_count,
memory_mb=worker_memory_mb)
except Exception as err:
LOGGER.error(err, exc_info=True)
raise exceptions.WorkerNodeCreationError(
@@ -1365,6 +1373,8 @@ def _create_nodes_async(self, input_native_entity: rde_2_x.NativeEntity):
# viz., template, storage_profile, and network among others.
worker_storage_profile = input_native_entity.spec.topology.workers.storage_profile # noqa: E501
worker_sizing_class = input_native_entity.spec.topology.workers.sizing_class # noqa: E501
worker_cpu_count = input_native_entity.spec.topology.workers.cpu
worker_memory_mb = input_native_entity.spec.topology.workers.memory
nfs_storage_profile = input_native_entity.spec.topology.nfs.storage_profile # noqa: E501
nfs_sizing_class = input_native_entity.spec.topology.nfs.sizing_class # noqa: E501
network_name = input_native_entity.spec.settings.ovdc_network
@@ -1406,7 +1416,9 @@ def _create_nodes_async(self, input_native_entity: rde_2_x.NativeEntity):
network_name=network_name,
storage_profile=worker_storage_profile,
ssh_key=ssh_key,
sizing_class_name=worker_sizing_class)
sizing_class_name=worker_sizing_class,
cpu_count=worker_cpu_count,
memory_mb=worker_memory_mb)
msg = f"Adding {num_workers_to_add} node(s) to cluster " \
f"{cluster_name}({cluster_id})"
self._update_task(BehaviorTaskStatus.RUNNING, message=msg)
@@ -2110,17 +2122,24 @@ def _get_nodes_details(sysadmin_client, vapp):
policy_name = vm.ComputePolicy.VmSizingPolicy.get('name')
sizing_class = compute_policy_manager.\
get_cse_policy_display_name(policy_name)
vm_obj = vcd_vm.VM(sysadmin_client, resource=vm)
cpu_count = vm_obj.get_cpus()['num_cpus']
memory_mb = vm_obj.get_memory()
storage_profile: Optional[str] = None
if hasattr(vm, 'StorageProfile'):
storage_profile = vm.StorageProfile.get('name')
if vm_name.startswith(NodeType.CONTROL_PLANE):
control_plane = rde_2_x.Node(name=vm_name, ip=ip,
sizing_class=sizing_class,
cpu=cpu_count,
memory=memory_mb,
storage_profile=storage_profile)
elif vm_name.startswith(NodeType.WORKER):
workers.append(
rde_2_x.Node(name=vm_name, ip=ip,
sizing_class=sizing_class,
cpu=cpu_count,
memory=memory_mb,
storage_profile=storage_profile))
elif vm_name.startswith(NodeType.NFS):
exports = None
@@ -2323,9 +2342,14 @@ def _get_template(name=None, revision=None):

def _add_nodes(sysadmin_client, num_nodes, node_type, org, vdc, vapp,
catalog_name, template, network_name, storage_profile=None,
ssh_key=None, sizing_class_name=None):
ssh_key=None, sizing_class_name=None, cpu_count=None,
memory_mb=None):
vcd_utils.raise_error_if_user_not_from_system_org(sysadmin_client)

if (cpu_count or memory_mb) and sizing_class_name:
raise exceptions.BadRequestError("Cannot specify both cpu/memory and "
"sizing class for control plane "
"node creation")
if num_nodes > 0:
specs = []
try:
@@ -2418,6 +2442,15 @@ def _add_nodes(sysadmin_client, num_nodes, node_type, org, vdc, vapp,
vm_resource = vapp.get_vm(vm_name)
vm = vcd_vm.VM(sysadmin_client, resource=vm_resource)

if cpu_count and cpu_count > 0:
# updating cpu count on the VM
task = vm.modify_cpu(cpu_count)
sysadmin_client.get_task_monitor().wait_for_status(task)
if memory_mb and memory_mb > 0:
# updating memory
task = vm.modify_memory(memory_mb)
sysadmin_client.get_task_monitor().wait_for_status(task)

task = vm.power_on()
sysadmin_client.get_task_monitor().wait_for_status(task)
vapp.reload()
66 changes: 38 additions & 28 deletions container_service_extension/rde/models/rde_2_0_0.py
Original file line number Diff line number Diff line change
@@ -413,13 +413,14 @@ def from_cluster_data(cls, cluster: dict, kind: str, **kwargs):
site = kwargs.get('site', '')
worker_nodes = []
for item in cluster['nodes']:
worker_nodes.append(
Node(
name=item['name'],
ip=item['ipAddress'],
storage_profile=cluster['storage_profile_name']
)
)
# Add cpu and memory details to the node part in status section
node = Node(
name=item['name'],
ip=item['ipAddress'],
storage_profile=cluster['storage_profile_name'],
cpu=int(item['numberOfCpus']),
memory=int(item['memoryMB']))
worker_nodes.append(node)
nfs_nodes = []
for item in cluster['nfs_nodes']:
# The item['exports'] field is a string
@@ -430,7 +431,6 @@ def from_cluster_data(cls, cluster: dict, kind: str, **kwargs):
exports_list_string = item['exports']
exports_list_string.replace('[', '').replace(']', '').replace('\'', '') # noqa: E501
exports_list = exports_list_string.split(", ")

nfs_nodes.append(
NfsNode(
name=item['name'],
@@ -451,20 +451,35 @@ def from_cluster_data(cls, cluster: dict, kind: str, **kwargs):
ovdc_network_name=cluster['network_name'],
distribution=k8_distribution,
ssh_key='')
control_plane_nodes = cluster['master_nodes']
topology_control_plane = ControlPlane(
count=len(control_plane_nodes),
storage_profile=cluster['storage_profile_name']
)
workers = cluster.get('nodes', [])
topology_workers = Workers(
count=len(cluster['nodes']),
storage_profile=cluster['storage_profile_name']
)
if kind != shared_constants.ClusterEntityKind.TKG_M.value and len(workers) > 0: # noqa: E501
topology_control_plane.cpu = int(control_plane_nodes[0]['numberOfCpus']) # noqa: E501
topology_control_plane.memory = int(control_plane_nodes[0]['memoryMB']) # noqa: E501
topology_workers.cpu = int(workers[0]['numberOfCpus'])
topology_workers.memory = int(workers[0]['memoryMB'])
topology = Topology(
workers=Workers(
count=len(cluster['nodes']),
storage_profile=cluster['storage_profile_name']
),
control_plane=ControlPlane(
count=len(cluster['master_nodes']),
storage_profile=cluster['storage_profile_name']
),
workers=topology_workers,
control_plane=topology_control_plane,
nfs=Nfs(
count=len(cluster['nfs_nodes']),
storage_profile=cluster['storage_profile_name']
)
)
node_control_plane = Node(
name=cluster['master_nodes'][0]['name'],
ip=cluster['master_nodes'][0]['ipAddress'],
storage_profile=cluster['storage_profile_name'],
cpu=int(control_plane_nodes[0]['numberOfCpus']),
memory=int(control_plane_nodes[0]['memoryMB']))
cluster_entity = cls(
kind=kind,
spec=ClusterSpec(
@@ -485,11 +500,7 @@ def from_cluster_data(cls, cluster: dict, kind: str, **kwargs):
os=cluster['os'],
docker_version=cluster['docker_version'],
nodes=Nodes(
control_plane=Node(
name=cluster['master_nodes'][0]['name'],
ip=cluster['master_nodes'][0]['ipAddress'],
storage_profile=cluster['storage_profile_name']
),
control_plane=node_control_plane,
workers=worker_nodes,
nfs=nfs_nodes
),
@@ -605,13 +616,12 @@ def get_sample_native_cluster_specification(cls, k8_runtime: str = shared_consta
del native_entity_dict['spec']['settings']['network']['cni']
del native_entity_dict['spec']['settings']['network']['pods']
del native_entity_dict['spec']['settings']['network']['services']
# Hiding the cpu and memory properties from controlPlane and workers
# for Andromeda (CSE 3.1). Below lines can be deleted once cpu and
# memory support is added in CSE 3.1.1
del native_entity_dict['spec']['topology']['controlPlane']['cpu']
del native_entity_dict['spec']['topology']['controlPlane']['memory']
del native_entity_dict['spec']['topology']['workers']['cpu']
del native_entity_dict['spec']['topology']['workers']['memory']

if k8_runtime == shared_constants.ClusterEntityKind.TKG_M.value:
del native_entity_dict['spec']['topology']['controlPlane']['cpu']
del native_entity_dict['spec']['topology']['controlPlane']['memory'] # noqa: E501
del native_entity_dict['spec']['topology']['workers']['cpu']
del native_entity_dict['spec']['topology']['workers']['memory']

sample_apply_spec = yaml.dump(native_entity_dict)
return cluster_spec_field_descriptions + sample_apply_spec
38 changes: 30 additions & 8 deletions container_service_extension/rde/utils.py
Original file line number Diff line number Diff line change
@@ -106,21 +106,43 @@ def construct_2_0_0_cluster_spec_from_entity_status(entity_status: rde_2_0_0.Sta
:return: Cluster Specification as defined in rde_2_0_0 model
"""
# Currently only single control-plane is supported.
control_plane = rde_2_0_0.ControlPlane(
sizing_class=entity_status.nodes.control_plane.sizing_class,
storage_profile=entity_status.nodes.control_plane.storage_profile,
count=1)
if entity_status.nodes.control_plane.sizing_class:
control_plane = rde_2_0_0.ControlPlane(
sizing_class=entity_status.nodes.control_plane.sizing_class,
storage_profile=entity_status.nodes.control_plane.storage_profile,
cpu=None,
memory=None,
count=1)
else:
control_plane = rde_2_0_0.ControlPlane(
sizing_class=None,
storage_profile=entity_status.nodes.control_plane.storage_profile,
cpu=entity_status.nodes.control_plane.cpu,
memory=entity_status.nodes.control_plane.memory,
count=1)

workers_count = len(entity_status.nodes.workers)
if workers_count == 0:
workers = rde_2_0_0.Workers(sizing_class=None,
cpu=None,
memory=None,
storage_profile=None,
count=0)
else:
workers = rde_2_0_0.Workers(
sizing_class=entity_status.nodes.workers[0].sizing_class,
storage_profile=entity_status.nodes.workers[0].storage_profile,
count=workers_count)
if entity_status.nodes.workers[0].sizing_class:
workers = rde_2_0_0.Workers(
sizing_class=entity_status.nodes.workers[0].sizing_class,
cpu=None,
memory=None,
storage_profile=entity_status.nodes.workers[0].storage_profile,
count=workers_count)
else:
workers = rde_2_0_0.Workers(
sizing_class=None,
storage_profile=entity_status.nodes.workers[0].storage_profile,
cpu=entity_status.nodes.workers[0].cpu,
memory=entity_status.nodes.workers[0].memory,
count=workers_count)

nfs_count = len(entity_status.nodes.nfs)
if nfs_count == 0:
18 changes: 16 additions & 2 deletions container_service_extension/rde/validators/validator_rde_2_x.py
Original file line number Diff line number Diff line change
@@ -21,7 +21,7 @@ def __init__(self):
pass

def validate(self, cloudapi_client: CloudApiClient, entity_id: str = None,
entity: dict = None, operation: BehaviorOperation = None) -> bool: # noqa: E501
entity: dict = None, operation: BehaviorOperation = BehaviorOperation.CREATE_CLUSTER) -> bool: # noqa: E501
"""Validate the input request.
This method performs
@@ -65,7 +65,19 @@ def validate(self, cloudapi_client: CloudApiClient, entity_id: str = None,
raise BadRequestError(msg)

# Return True if the operation is not specified.
if not operation:
if operation == BehaviorOperation.CREATE_CLUSTER:
# Need to ensure that sizing class along with cpu/memory is not
# present in the request
bad_request_msg = ""
if isinstance(input_entity, rde_2_0_0.NativeEntity):
# cpu and mem are properties of only rde 2.0.0
if input_entity.spec.topology.workers.sizing_class and \
(input_entity.spec.topology.workers.cpu or input_entity.spec.topology.workers.memory): # noqa: E501
bad_request_msg = "Cannot specify both sizing class and cpu/memory for Workers nodes." # noqa: E501
if input_entity.spec.topology.control_plane.sizing_class and (input_entity.spec.topology.control_plane.cpu or input_entity.spec.topology.control_plane.memory): # noqa: E501
bad_request_msg = "Cannot specify both sizing class and cpu/memory for Control Plane nodes." # noqa: E501
if bad_request_msg:
raise BadRequestError(bad_request_msg)
return True

# TODO: validators for rest of the CSE operations in V36 will be
@@ -105,6 +117,8 @@ def validate_cluster_update_request_and_check_cluster_upgrade(input_spec: rde_2_
# validation if worker count is 0
exclude_fields.append(FlattenedClusterSpecKey2X.WORKERS_SIZING_CLASS.value) # noqa: E501
exclude_fields.append(FlattenedClusterSpecKey2X.WORKERS_STORAGE_PROFILE.value) # noqa: E501
exclude_fields.append(FlattenedClusterSpecKey2X.WORKERS_CPU_COUNT.value) # noqa: E501
exclude_fields.append(FlattenedClusterSpecKey2X.WORKERS_MEMORY_MB.value) # noqa: E501
if reference_spec.topology.nfs.count == 0:
# Exclude nfs nodes' sizing class and storage profile from validation
# if nfs count is 0

0 comments on commit caabbc0

Please sign in to comment.