[VCDA-2571] Use cpu/mem fields for native cluster operations (vmware#…

…1181) * Use cpu/mem fields for native cluster operations Signed-off-by: Aniruddha Shamasundar <aniruddha.9794@gmail.com> * Address review comment Signed-off-by: Aniruddha Shamasundar <aniruddha.9794@gmail.com> * Include CSE 3.1.1 in allowed upgrade paths Signed-off-by: Aniruddha Shamasundar <aniruddha.9794@gmail.com> * Address review comments Signed-off-by: Aniruddha Shamasundar <aniruddha.9794@gmail.com> * Address review comments Signed-off-by: Aniruddha Shamasundar <aniruddha.9794@gmail.com>
andrew-ni · Sep 3, 2021 · caabbc0 · caabbc0
1 parent 3c37748
commit caabbc0
Showing 6 changed files with 123 additions and 42 deletions.
diff --git a/container_service_extension/common/constants/server_constants.py b/container_service_extension/common/constants/server_constants.py
@@ -144,6 +144,8 @@ class FlattenedClusterSpecKey2X(Enum):
     WORKERS_COUNT = 'topology.workers.count'
     WORKERS_SIZING_CLASS = 'topology.workers.sizingClass'
     WORKERS_STORAGE_PROFILE = 'topology.workers.storageProfile'
+    WORKERS_CPU_COUNT = 'topology.workers.cpu'
+    WORKERS_MEMORY_MB = 'topology.workers.memory'
     NFS_COUNT = 'topology.nfs.count'
     NFS_SIZING_CLASS = 'topology.nfs.sizingClass'
     NFS_STORAGE_PROFILE = 'topology.nfs.storageProfile'

diff --git a/container_service_extension/installer/configure_cse.py b/container_service_extension/installer/configure_cse.py
@@ -730,7 +730,7 @@ def upgrade_cse(config_file_name, config, skip_template_creation,
         # CSE version info in extension description is only applicable for
         # CSE 3.0.0+ versions.
         allowed_source_cse_versions = \
-            semantic_version.SimpleSpec('>=3.0.0,<=3.1.0')
+            semantic_version.SimpleSpec('>=3.0.0,<=3.1.1')
         valid_source_cse_installation = \
             allowed_source_cse_versions.match(ext_cse_version)
 

diff --git a/container_service_extension/rde/backend/cluster_service_2_x.py b/container_service_extension/rde/backend/cluster_service_2_x.py
@@ -843,7 +843,11 @@ def _create_cluster_async(self, cluster_id: str,
             ovdc_name = input_native_entity.metadata.virtual_data_center_name
             num_workers = input_native_entity.spec.topology.workers.count
             control_plane_sizing_class = input_native_entity.spec.topology.control_plane.sizing_class  # noqa: E501
+            control_plane_cpu_count = input_native_entity.spec.topology.control_plane.cpu # noqa: E501
+            control_plane_memory_mb = input_native_entity.spec.topology.control_plane.memory # noqa: E501
             worker_sizing_class = input_native_entity.spec.topology.workers.sizing_class  # noqa: E501
+            worker_cpu_count = input_native_entity.spec.topology.workers.cpu
+            worker_memory_mb = input_native_entity.spec.topology.workers.memory
             control_plane_storage_profile = input_native_entity.spec.topology.control_plane.storage_profile  # noqa: E501
             worker_storage_profile = input_native_entity.spec.topology.workers.storage_profile  # noqa: E501
             nfs_count = input_native_entity.spec.topology.nfs.count
@@ -917,6 +921,8 @@ def _create_cluster_async(self, cluster_id: str,
                            network_name=network_name,
                            storage_profile=control_plane_storage_profile,
                            ssh_key=ssh_key,
+                           cpu_count=control_plane_cpu_count,
+                           memory_mb=control_plane_memory_mb,
                            sizing_class_name=control_plane_sizing_class)
             except Exception as err:
                 LOGGER.error(err, exc_info=True)
@@ -976,7 +982,9 @@ def _create_cluster_async(self, cluster_id: str,
                            network_name=network_name,
                            storage_profile=worker_storage_profile,
                            ssh_key=ssh_key,
-                           sizing_class_name=worker_sizing_class)
+                           sizing_class_name=worker_sizing_class,
+                           cpu_count=worker_cpu_count,
+                           memory_mb=worker_memory_mb)
             except Exception as err:
                 LOGGER.error(err, exc_info=True)
                 raise exceptions.WorkerNodeCreationError(
@@ -1365,6 +1373,8 @@ def _create_nodes_async(self, input_native_entity: rde_2_x.NativeEntity):
             # viz., template, storage_profile, and network among others.
             worker_storage_profile = input_native_entity.spec.topology.workers.storage_profile  # noqa: E501
             worker_sizing_class = input_native_entity.spec.topology.workers.sizing_class  # noqa: E501
+            worker_cpu_count = input_native_entity.spec.topology.workers.cpu
+            worker_memory_mb = input_native_entity.spec.topology.workers.memory
             nfs_storage_profile = input_native_entity.spec.topology.nfs.storage_profile  # noqa: E501
             nfs_sizing_class = input_native_entity.spec.topology.nfs.sizing_class  # noqa: E501
             network_name = input_native_entity.spec.settings.ovdc_network
@@ -1406,7 +1416,9 @@ def _create_nodes_async(self, input_native_entity: rde_2_x.NativeEntity):
                     network_name=network_name,
                     storage_profile=worker_storage_profile,
                     ssh_key=ssh_key,
-                    sizing_class_name=worker_sizing_class)
+                    sizing_class_name=worker_sizing_class,
+                    cpu_count=worker_cpu_count,
+                    memory_mb=worker_memory_mb)
                 msg = f"Adding {num_workers_to_add} node(s) to cluster " \
                       f"{cluster_name}({cluster_id})"
                 self._update_task(BehaviorTaskStatus.RUNNING, message=msg)
@@ -2110,17 +2122,24 @@ def _get_nodes_details(sysadmin_client, vapp):
                 policy_name = vm.ComputePolicy.VmSizingPolicy.get('name')
                 sizing_class = compute_policy_manager.\
                     get_cse_policy_display_name(policy_name)
+            vm_obj = vcd_vm.VM(sysadmin_client, resource=vm)
+            cpu_count = vm_obj.get_cpus()['num_cpus']
+            memory_mb = vm_obj.get_memory()
             storage_profile: Optional[str] = None
             if hasattr(vm, 'StorageProfile'):
                 storage_profile = vm.StorageProfile.get('name')
             if vm_name.startswith(NodeType.CONTROL_PLANE):
                 control_plane = rde_2_x.Node(name=vm_name, ip=ip,
                                              sizing_class=sizing_class,
+                                             cpu=cpu_count,
+                                             memory=memory_mb,
                                              storage_profile=storage_profile)
             elif vm_name.startswith(NodeType.WORKER):
                 workers.append(
                     rde_2_x.Node(name=vm_name, ip=ip,
                                  sizing_class=sizing_class,
+                                 cpu=cpu_count,
+                                 memory=memory_mb,
                                  storage_profile=storage_profile))
             elif vm_name.startswith(NodeType.NFS):
                 exports = None
@@ -2323,9 +2342,14 @@ def _get_template(name=None, revision=None):
 
 def _add_nodes(sysadmin_client, num_nodes, node_type, org, vdc, vapp,
                catalog_name, template, network_name, storage_profile=None,
-               ssh_key=None, sizing_class_name=None):
+               ssh_key=None, sizing_class_name=None, cpu_count=None,
+               memory_mb=None):
     vcd_utils.raise_error_if_user_not_from_system_org(sysadmin_client)
 
+    if (cpu_count or memory_mb) and sizing_class_name:
+        raise exceptions.BadRequestError("Cannot specify both cpu/memory and "
+                                         "sizing class for control plane "
+                                         "node creation")
     if num_nodes > 0:
         specs = []
         try:
@@ -2418,6 +2442,15 @@ def _add_nodes(sysadmin_client, num_nodes, node_type, org, vdc, vapp,
                 vm_resource = vapp.get_vm(vm_name)
                 vm = vcd_vm.VM(sysadmin_client, resource=vm_resource)
 
+                if cpu_count and cpu_count > 0:
+                    # updating cpu count on the VM
+                    task = vm.modify_cpu(cpu_count)
+                    sysadmin_client.get_task_monitor().wait_for_status(task)
+                if memory_mb and memory_mb > 0:
+                    # updating memory
+                    task = vm.modify_memory(memory_mb)
+                    sysadmin_client.get_task_monitor().wait_for_status(task)
+
                 task = vm.power_on()
                 sysadmin_client.get_task_monitor().wait_for_status(task)
                 vapp.reload()

diff --git a/container_service_extension/rde/models/rde_2_0_0.py b/container_service_extension/rde/models/rde_2_0_0.py
@@ -413,13 +413,14 @@ def from_cluster_data(cls, cluster: dict, kind: str, **kwargs):
         site = kwargs.get('site', '')
         worker_nodes = []
         for item in cluster['nodes']:
-            worker_nodes.append(
-                Node(
-                    name=item['name'],
-                    ip=item['ipAddress'],
-                    storage_profile=cluster['storage_profile_name']
-                )
-            )
+            # Add cpu and memory details to the node part in status section
+            node = Node(
+                name=item['name'],
+                ip=item['ipAddress'],
+                storage_profile=cluster['storage_profile_name'],
+                cpu=int(item['numberOfCpus']),
+                memory=int(item['memoryMB']))
+            worker_nodes.append(node)
         nfs_nodes = []
         for item in cluster['nfs_nodes']:
             # The item['exports'] field is a string
@@ -430,7 +431,6 @@ def from_cluster_data(cls, cluster: dict, kind: str, **kwargs):
             exports_list_string = item['exports']
             exports_list_string.replace('[', '').replace(']', '').replace('\'', '')  # noqa: E501
             exports_list = exports_list_string.split(", ")
-
             nfs_nodes.append(
                 NfsNode(
                     name=item['name'],
@@ -451,20 +451,35 @@ def from_cluster_data(cls, cluster: dict, kind: str, **kwargs):
             ovdc_network_name=cluster['network_name'],
             distribution=k8_distribution,
             ssh_key='')
+        control_plane_nodes = cluster['master_nodes']
+        topology_control_plane = ControlPlane(
+            count=len(control_plane_nodes),
+            storage_profile=cluster['storage_profile_name']
+        )
+        workers = cluster.get('nodes', [])
+        topology_workers = Workers(
+            count=len(cluster['nodes']),
+            storage_profile=cluster['storage_profile_name']
+        )
+        if kind != shared_constants.ClusterEntityKind.TKG_M.value and len(workers) > 0:  # noqa: E501
+            topology_control_plane.cpu = int(control_plane_nodes[0]['numberOfCpus'])  # noqa: E501
+            topology_control_plane.memory = int(control_plane_nodes[0]['memoryMB'])  # noqa: E501
+            topology_workers.cpu = int(workers[0]['numberOfCpus'])
+            topology_workers.memory = int(workers[0]['memoryMB'])
         topology = Topology(
-            workers=Workers(
-                count=len(cluster['nodes']),
-                storage_profile=cluster['storage_profile_name']
-            ),
-            control_plane=ControlPlane(
-                count=len(cluster['master_nodes']),
-                storage_profile=cluster['storage_profile_name']
-            ),
+            workers=topology_workers,
+            control_plane=topology_control_plane,
             nfs=Nfs(
                 count=len(cluster['nfs_nodes']),
                 storage_profile=cluster['storage_profile_name']
             )
         )
+        node_control_plane = Node(
+            name=cluster['master_nodes'][0]['name'],
+            ip=cluster['master_nodes'][0]['ipAddress'],
+            storage_profile=cluster['storage_profile_name'],
+            cpu=int(control_plane_nodes[0]['numberOfCpus']),
+            memory=int(control_plane_nodes[0]['memoryMB']))
         cluster_entity = cls(
             kind=kind,
             spec=ClusterSpec(
@@ -485,11 +500,7 @@ def from_cluster_data(cls, cluster: dict, kind: str, **kwargs):
                 os=cluster['os'],
                 docker_version=cluster['docker_version'],
                 nodes=Nodes(
-                    control_plane=Node(
-                        name=cluster['master_nodes'][0]['name'],
-                        ip=cluster['master_nodes'][0]['ipAddress'],
-                        storage_profile=cluster['storage_profile_name']
-                    ),
+                    control_plane=node_control_plane,
                     workers=worker_nodes,
                     nfs=nfs_nodes
                 ),
@@ -605,13 +616,12 @@ def get_sample_native_cluster_specification(cls, k8_runtime: str = shared_consta
         del native_entity_dict['spec']['settings']['network']['cni']
         del native_entity_dict['spec']['settings']['network']['pods']
         del native_entity_dict['spec']['settings']['network']['services']
-        # Hiding the cpu and memory properties from controlPlane and workers
-        # for Andromeda (CSE 3.1). Below lines can be deleted once cpu and
-        # memory support is added in CSE 3.1.1
-        del native_entity_dict['spec']['topology']['controlPlane']['cpu']
-        del native_entity_dict['spec']['topology']['controlPlane']['memory']
-        del native_entity_dict['spec']['topology']['workers']['cpu']
-        del native_entity_dict['spec']['topology']['workers']['memory']
+
+        if k8_runtime == shared_constants.ClusterEntityKind.TKG_M.value:
+            del native_entity_dict['spec']['topology']['controlPlane']['cpu']
+            del native_entity_dict['spec']['topology']['controlPlane']['memory']  # noqa: E501
+            del native_entity_dict['spec']['topology']['workers']['cpu']
+            del native_entity_dict['spec']['topology']['workers']['memory']
 
         sample_apply_spec = yaml.dump(native_entity_dict)
         return cluster_spec_field_descriptions + sample_apply_spec
diff --git a/container_service_extension/rde/utils.py b/container_service_extension/rde/utils.py
@@ -106,21 +106,43 @@ def construct_2_0_0_cluster_spec_from_entity_status(entity_status: rde_2_0_0.Sta
     :return: Cluster Specification as defined in rde_2_0_0 model
     """
     # Currently only single control-plane is supported.
-    control_plane = rde_2_0_0.ControlPlane(
-        sizing_class=entity_status.nodes.control_plane.sizing_class,
-        storage_profile=entity_status.nodes.control_plane.storage_profile,
-        count=1)
+    if entity_status.nodes.control_plane.sizing_class:
+        control_plane = rde_2_0_0.ControlPlane(
+            sizing_class=entity_status.nodes.control_plane.sizing_class,
+            storage_profile=entity_status.nodes.control_plane.storage_profile,
+            cpu=None,
+            memory=None,
+            count=1)
+    else:
+        control_plane = rde_2_0_0.ControlPlane(
+            sizing_class=None,
+            storage_profile=entity_status.nodes.control_plane.storage_profile,
+            cpu=entity_status.nodes.control_plane.cpu,
+            memory=entity_status.nodes.control_plane.memory,
+            count=1)
 
     workers_count = len(entity_status.nodes.workers)
     if workers_count == 0:
         workers = rde_2_0_0.Workers(sizing_class=None,
+                                    cpu=None,
+                                    memory=None,
                                     storage_profile=None,
                                     count=0)
     else:
-        workers = rde_2_0_0.Workers(
-            sizing_class=entity_status.nodes.workers[0].sizing_class,
-            storage_profile=entity_status.nodes.workers[0].storage_profile,
-            count=workers_count)
+        if entity_status.nodes.workers[0].sizing_class:
+            workers = rde_2_0_0.Workers(
+                sizing_class=entity_status.nodes.workers[0].sizing_class,
+                cpu=None,
+                memory=None,
+                storage_profile=entity_status.nodes.workers[0].storage_profile,
+                count=workers_count)
+        else:
+            workers = rde_2_0_0.Workers(
+                sizing_class=None,
+                storage_profile=entity_status.nodes.workers[0].storage_profile,
+                cpu=entity_status.nodes.workers[0].cpu,
+                memory=entity_status.nodes.workers[0].memory,
+                count=workers_count)
 
     nfs_count = len(entity_status.nodes.nfs)
     if nfs_count == 0:

diff --git a/container_service_extension/rde/validators/validator_rde_2_x.py b/container_service_extension/rde/validators/validator_rde_2_x.py
@@ -21,7 +21,7 @@ def __init__(self):
         pass
 
     def validate(self, cloudapi_client: CloudApiClient, entity_id: str = None,
-                 entity: dict = None, operation: BehaviorOperation = None) -> bool:  # noqa: E501
+                 entity: dict = None, operation: BehaviorOperation = BehaviorOperation.CREATE_CLUSTER) -> bool:  # noqa: E501
         """Validate the input request.
 
         This method performs
@@ -65,7 +65,19 @@ def validate(self, cloudapi_client: CloudApiClient, entity_id: str = None,
                 raise BadRequestError(msg)
 
         # Return True if the operation is not specified.
-        if not operation:
+        if operation == BehaviorOperation.CREATE_CLUSTER:
+            # Need to ensure that sizing class along with cpu/memory is not
+            # present in the request
+            bad_request_msg = ""
+            if isinstance(input_entity, rde_2_0_0.NativeEntity):
+                # cpu and mem are properties of only rde 2.0.0
+                if input_entity.spec.topology.workers.sizing_class and \
+                        (input_entity.spec.topology.workers.cpu or input_entity.spec.topology.workers.memory):  # noqa: E501
+                    bad_request_msg = "Cannot specify both sizing class and cpu/memory for Workers nodes."  # noqa: E501
+                if input_entity.spec.topology.control_plane.sizing_class and (input_entity.spec.topology.control_plane.cpu or input_entity.spec.topology.control_plane.memory): # noqa: E501
+                    bad_request_msg = "Cannot specify both sizing class and cpu/memory for Control Plane nodes." # noqa: E501
+                if bad_request_msg:
+                    raise BadRequestError(bad_request_msg)
             return True
 
         # TODO: validators for rest of the CSE operations in V36 will be
@@ -105,6 +117,8 @@ def validate_cluster_update_request_and_check_cluster_upgrade(input_spec: rde_2_
         # validation if worker count is 0
         exclude_fields.append(FlattenedClusterSpecKey2X.WORKERS_SIZING_CLASS.value)  # noqa: E501
         exclude_fields.append(FlattenedClusterSpecKey2X.WORKERS_STORAGE_PROFILE.value)  # noqa: E501
+        exclude_fields.append(FlattenedClusterSpecKey2X.WORKERS_CPU_COUNT.value)  # noqa: E501
+        exclude_fields.append(FlattenedClusterSpecKey2X.WORKERS_MEMORY_MB.value)  # noqa: E501
     if reference_spec.topology.nfs.count == 0:
         # Exclude nfs nodes' sizing class and storage profile from validation
         # if nfs count is 0