feat(vertexai): Support google_vertex_ai_deployment_resource_pool (Go…

…ogleCloudPlatform#9518)
Cheriit · Jun 4, 2024 · 49a80c0 · 49a80c0
1 parent 4b20a74
commit 49a80c0
Show file tree

Hide file tree

Showing 3 changed files with 201 additions and 0 deletions.
diff --git a/mmv1/products/vertexai/DeploymentResourcePool.yaml b/mmv1/products/vertexai/DeploymentResourcePool.yaml
@@ -0,0 +1,161 @@
+# Copyright 2024 Google Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+--- !ruby/object:Api::Resource
+name: 'DeploymentResourcePool'
+base_url: 'projects/{{project}}/locations/{{region}}/deploymentResourcePools'
+create_url: 'projects/{{project}}/locations/{{region}}/deploymentResourcePools'
+self_link: 'projects/{{project}}/locations/{{region}}/deploymentResourcePools/{{name}}'
+# No method is not defined for DeploymentResourcePools
+immutable: true
+create_verb: :POST
+references: !ruby/object:Api::Resource::ReferenceLinks
+  api: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.deploymentResourcePools
+async: !ruby/object:Api::OpAsync
+  actions:
+    - create
+    - delete
+  operation: !ruby/object:Api::OpAsync::Operation
+    path: 'name'
+    base_url: '{{op_id}}'
+    wait_ms: 1000
+  result: !ruby/object:Api::OpAsync::Result
+    path: 'response'
+    resource_inside_response: true
+  status: !ruby/object:Api::OpAsync::Status
+    path: 'done'
+    complete: true
+    allowed:
+      - true
+      - false
+  error: !ruby/object:Api::OpAsync::Error
+    path: 'error'
+    message: 'message'
+description: |-
+  'DeploymentResourcePool can be shared by multiple deployed models,
+  whose underlying specification consists of dedicated resources.'
+custom_code: !ruby/object:Provider::Terraform::CustomCode
+  encoder: templates/terraform/encoders/wrap_object_with_deployment_resource_pool_id.go.erb
+examples:
+  - !ruby/object:Provider::Terraform::Examples
+    name: 'vertex_ai_deployment_resource_pool'
+    primary_resource_id: 'deployment_resource_pool'
+    vars:
+      deployment_resource_pool_id: example-deployment-resource-pool
+parameters:
+  - !ruby/object:Api::Type::String
+    name: region
+    description: The region of deployment resource pool. eg us-central1
+    url_param_only: true
+    immutable: true
+properties:
+  - !ruby/object:Api::Type::String
+    name: 'name'
+    description: The resource name of deployment resource pool. The maximum length is 63 characters, and valid characters are `/^[a-z]([a-z0-9-]{0,61}[a-z0-9])?$/`.
+    immutable: true
+    required: true
+    custom_flatten: templates/terraform/custom_flatten/name_from_self_link.erb
+  - !ruby/object:Api::Type::NestedObject
+    name: 'dedicatedResources'
+    description: The underlying dedicated resources that the deployment resource pool uses.
+    properties:
+      - !ruby/object:Api::Type::NestedObject
+        name: 'machineSpec'
+        required: true
+        immutable: true
+        description: The specification of a single machine used by the prediction
+        properties:
+          - !ruby/object:Api::Type::String
+            name: machineType
+            description: The type of the machine. See the [list of machine types supported for prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types).
+            immutable: true
+          - !ruby/object:Api::Type::String
+            name: acceleratorType
+            description:
+              The type of accelerator(s) that may be attached to the
+              machine as per accelerator_count. See possible values
+              [here](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#AcceleratorType).
+          - !ruby/object:Api::Type::Integer
+            name: acceleratorCount
+            description: The number of accelerators to attach to the machine.
+      - !ruby/object:Api::Type::Integer
+        name: minReplicaCount
+        required: true
+        immutable: true
+        description:
+          The minimum number of machine replicas this DeployedModel will
+          be always deployed on. This value must be greater than or equal
+          to 1. If traffic against the DeployedModel increases, it may
+          dynamically be deployed onto more replicas, and as traffic
+          decreases, some of these extra replicas may be freed.
+      - !ruby/object:Api::Type::Integer
+        name: maxReplicaCount
+        immutable: true
+        description:
+          The maximum number of replicas this DeployedModel may be
+          deployed on when the traffic against it increases. If the
+          requested value is too large, the deployment will error, but if
+          deployment succeeds then the ability to scale the model to that
+          many replicas is guaranteed (barring service outages). If
+          traffic against the DeployedModel increases beyond what its
+          replicas at maximum may handle, a portion of the traffic will be
+          dropped. If this value is not provided, will use
+          min_replica_count as the default value. The value of this field
+          impacts the charge against Vertex CPU and GPU quotas.
+          Specifically, you will be charged for max_replica_count * number
+          of cores in the selected machine type) and (max_replica_count *
+          number of GPUs per replica in the selected machine type).
+      - !ruby/object:Api::Type::Array
+        name: autoscalingMetricSpecs
+        immutable: true
+        description: A list of the metric specifications that overrides a resource utilization metric.
+        item_type: !ruby/object:Api::Type::NestedObject
+          name: autoscalingMetricSpecs
+          description:
+            The metric specifications that overrides a resource
+            utilization metric (CPU utilization, accelerator's duty cycle,
+            and so on) target value (default to 60 if not set). At most
+            one entry is allowed per metric. If
+            machine_spec.accelerator_count is above 0, the autoscaling
+            will be based on both CPU utilization and accelerator's duty
+            cycle metrics and scale up when either metrics exceeds its
+            target value while scale down if both metrics are under their
+            target value. The default target value is 60 for both metrics.
+            If machine_spec.accelerator_count is 0, the autoscaling will
+            be based on CPU utilization metric only with default target
+            value 60 if not explicitly set. For example, in the case of
+            Online Prediction, if you want to override target CPU
+            utilization to 80, you should set
+            autoscaling_metric_specs.metric_name to
+            `aiplatform.googleapis.com/prediction/online/cpu/utilization`
+            and autoscaling_metric_specs.target to `80`.
+          properties:
+            - !ruby/object:Api::Type::String
+              name: metricName
+              required: true
+              description:
+                'The resource metric name. Supported metrics: For Online Prediction:
+                * `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle`
+                * `aiplatform.googleapis.com/prediction/online/cpu/utilization`'
+            - !ruby/object:Api::Type::Integer
+              name: target
+              description:
+                The target resource utilization in percentage (1% - 100%)
+                for the given metric; once the real usage deviates from
+                the target by a certain percentage, the machine replicas
+                change. The default value is 60 (representing 60%) if not
+                provided.
+  - !ruby/object:Api::Type::String
+    name: 'createTime'
+    output: true
+    description: A timestamp in RFC3339 UTC "Zulu" format, with nanosecond resolution and up to nine fractional digits.
diff --git a/mmv1/templates/terraform/encoders/wrap_object_with_deployment_resource_pool_id.go.erb b/mmv1/templates/terraform/encoders/wrap_object_with_deployment_resource_pool_id.go.erb
@@ -0,0 +1,21 @@
+<%# The license inside this block applies to this file.
+	# Copyright 2024 Google Inc.
+	# Licensed under the Apache License, Version 2.0 (the "License");
+	# you may not use this file except in compliance with the License.
+	# You may obtain a copy of the License at
+	#
+	#     http://www.apache.org/licenses/LICENSE-2.0
+	#
+	# Unless required by applicable law or agreed to in writing, software
+	# distributed under the License is distributed on an "AS IS" BASIS,
+	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+	# See the License for the specific language governing permissions and
+	# limitations under the License.
+-%>
+	newObj := make(map[string]interface{})
+	newObj["deploymentResourcePool"] = obj
+	nameProp, ok := d.GetOk("name")
+	if ok && nameProp != nil {
+		newObj["deploymentResourcePoolId"] = nameProp
+	}
+	return newObj, nil
diff --git a/mmv1/templates/terraform/examples/vertex_ai_deployment_resource_pool.tf.erb b/mmv1/templates/terraform/examples/vertex_ai_deployment_resource_pool.tf.erb
@@ -0,0 +1,19 @@
+resource "google_vertex_ai_deployment_resource_pool" "<%= ctx[:primary_resource_id] %>" {
+    region = "us-central1"
+    name = "<%= ctx[:vars]['deployment_resource_pool_id'] %>"
+    dedicated_resources {
+        machine_spec {
+            machine_type = "n1-standard-4"
+            accelerator_type = "NVIDIA_TESLA_K80"
+            accelerator_count = 1
+        }
+
+        min_replica_count = 1
+        max_replica_count = 2
+
+        autoscaling_metric_specs {
+            metric_name = "aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle"
+            target = 60
+        }
+    }
+}