diff --git a/kubernetes/EKS/config.properties b/kubernetes/EKS/config.properties index 7dbc411a58..67f9d10cce 100644 --- a/kubernetes/EKS/config.properties +++ b/kubernetes/EKS/config.properties @@ -1,5 +1,6 @@ inference_address=http://0.0.0.0:8080 management_address=http://0.0.0.0:8081 +metrics_address=http://0.0.0.0:8082 NUM_WORKERS=1 number_of_gpu=1 number_of_netty_threads=32 diff --git a/kubernetes/README.md b/kubernetes/README.md index 800c6a1d6e..1b58a15fca 100644 --- a/kubernetes/README.md +++ b/kubernetes/README.md @@ -279,10 +279,12 @@ Follow the link for log aggregation with EFK Stack.\ * Helm is picking up other .yaml files. Make sure you’ve added other files correctly to .helmignore. It should only run with values.yaml. * `kubectl describe pod` shows error message "0/1 nodes are available: 1 Insufficient cpu." * Ensure that the `n_cpu` value in `values.yaml` is set to a number that can be supported by the nodes in the cluster. - + +## Autoscaling + [Autoscaling with torchserve metrics](autoscale.md) + ## Roadmap -* [] Autoscaling * [] Log / Metrics Aggregation using [AWS Container Insights](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/ContainerInsights.html) * [] EFK Stack Integration * [] Readiness / Liveness Probes diff --git a/kubernetes/adapter.yaml b/kubernetes/adapter.yaml new file mode 100644 index 0000000000..00025def96 --- /dev/null +++ b/kubernetes/adapter.yaml @@ -0,0 +1,200 @@ +# Default values for k8s-prometheus-adapter.. +affinity: {} + +image: + repository: k8s.gcr.io/prometheus-adapter/prometheus-adapter + tag: v0.9.0 + pullPolicy: IfNotPresent + +logLevel: 4 + +metricsRelistInterval: 1m + +listenPort: 6443 + +nodeSelector: {} + +priorityClassName: "" + +# Url to access prometheus +prometheus: + # Value is templated + url: http://prometheus-server.default.svc.cluster.local + port: 80 + path: "" + +replicas: 1 + +# k8s 1.21 needs fsGroup to be set for non root deployments +# ref: https://github.com/kubernetes/kubernetes/issues/70679 +podSecurityContext: + fsGroup: 10001 + +rbac: + # Specifies whether RBAC resources should be created + create: true + +psp: + # Specifies whether PSP resources should be created + create: false + +serviceAccount: + # Specifies whether a service account should be created + create: true + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: + # ServiceAccount annotations. + # Use case: AWS EKS IAM roles for service accounts + # ref: https://docs.aws.amazon.com/eks/latest/userguide/specify-service-account-role.html + annotations: {} + +# Custom DNS configuration to be added to prometheus-adapter pods +dnsConfig: {} +# nameservers: +# - 1.2.3.4 +# searches: +# - ns1.svc.cluster-domain.example +# - my.dns.search.suffix +# options: +# - name: ndots +# value: "2" +# - name: edns0 +resources: {} + # requests: + # cpu: 100m + # memory: 128Mi + # limits: + # cpu: 100m + # memory: 128Mi + +rules: + default: true + custom: [] +# - seriesQuery: '{__name__=~"^some_metric_count$"}' +# resources: +# template: <<.Resource>> +# name: +# matches: "" +# as: "my_custom_metric" +# metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>) + # Mounts a configMap with pre-generated rules for use. Overrides the + # default, custom, external and resource entries + existing: + external: + - seriesQuery: '{__name__=~"^ts_queue_latency_microseconds"}' + resources: + overrides: + namespace: + resource: namespace + service: + resource: service + pod: + resource: pod + name: + matches: "^(.*)_microseconds" + as: "ts_queue_latency_microseconds" + metricsQuery: ts_queue_latency_microseconds + resource: {} +# cpu: +# containerQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>, container!=""}[3m])) by (<<.GroupBy>>) +# nodeQuery: sum(rate(container_cpu_usage_seconds_total{<<.LabelMatchers>>, id='/'}[3m])) by (<<.GroupBy>>) +# resources: +# overrides: +# node: +# resource: node +# namespace: +# resource: namespace +# pod: +# resource: pod +# containerLabel: container +# memory: +# containerQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>, container!=""}) by (<<.GroupBy>>) +# nodeQuery: sum(container_memory_working_set_bytes{<<.LabelMatchers>>,id='/'}) by (<<.GroupBy>>) +# resources: +# overrides: +# node: +# resource: node +# namespace: +# resource: namespace +# pod: +# resource: pod +# containerLabel: container +# window: 3m + +service: + annotations: {} + port: 443 + type: ClusterIP +# clusterIP: 1.2.3.4 + +tls: + enable: false + ca: |- + # Public CA file that signed the APIService + key: |- + # Private key of the APIService + certificate: |- + # Public key of the APIService + +# Any extra arguments +extraArguments: [] + # - --tls-private-key-file=/etc/tls/tls.key + # - --tls-cert-file=/etc/tls/tls.crt + +# Any extra volumes +extraVolumes: [] + # - name: example-name + # hostPath: + # path: /path/on/host + # type: DirectoryOrCreate + # - name: ssl-certs + # hostPath: + # path: /etc/ssl/certs/ca-bundle.crt + # type: File + +# Any extra volume mounts +extraVolumeMounts: [] + # - name: example-name + # mountPath: /path/in/container + # - name: ssl-certs + # mountPath: /etc/ssl/certs/ca-certificates.crt + # readOnly: true + +tolerations: [] + +# Labels added to the pod +podLabels: {} + +# Annotations added to the pod +podAnnotations: {} + +hostNetwork: + # Specifies if prometheus-adapter should be started in hostNetwork mode. + # + # You would require this enabled if you use alternate overlay networking for pods and + # API server unable to communicate with metrics-server. As an example, this is required + # if you use Weave network on EKS. See also dnsPolicy + enabled: false + +# When hostNetwork is enabled, you probably want to set this to ClusterFirstWithHostNet +# dnsPolicy: ClusterFirstWithHostNet + +# Deployment strategy type +strategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 25% + maxSurge: 25% + +podDisruptionBudget: + # Specifies if PodDisruptionBudget should be enabled + # When enabled, minAvailable or maxUnavailable should also be defined. + enabled: false + minAvailable: + maxUnavailable: 1 + +certManager: + enabled: false + caCertDuration: 43800h + certDuration: 8760h diff --git a/kubernetes/autoscale.md b/kubernetes/autoscale.md new file mode 100644 index 0000000000..923be6b1e0 --- /dev/null +++ b/kubernetes/autoscale.md @@ -0,0 +1,170 @@ +# Autoscaler + +Setup Kubernetes HPA(Horizontal Pod Autoscaler) for Torchserve, tuned for torchserve metrics. This uses Prometheus as metrics collector and Prometheus Adapter as mertrics server, serving Torchserve metrics for HPA. + +## Steps + +### 1. Install Torchserve with metrics enabled for prometheus format + +[Install TorchServe using Helm Charts](README.md##-Deploy-TorchServe-using-Helm-Charts) +### 2. Install Prometheus + +```bash +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +helm install prometheus prometheus-community/prometheus +``` + +The above command outputs prometheus server url: + +```bash +NAME: prometheus +LAST DEPLOYED: Wed Sep 8 19:10:49 2021 +NAMESPACE: default +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +The Prometheus server can be accessed via port 80 on the following DNS name from within your cluster: +prometheus-server.default.svc.cluster.local +... +... +``` + +### 3. Install Prometheus Adapater + +- Update Prometheus url and port in adapter.yaml. Use the url given in prometheus installation output. + +```yaml +# Url to access prometheus +prometheus: + # Value is templated + url: http://prometheus-server.default.svc.cluster.local + port: 80 + path: "" +``` + +- Update external metrics rules in adapter.yaml. Here we enabling external metrics in prometheus adapter and serving `ts_queue_latency_microseconds` metric. + +```yaml +external: +- seriesQuery: '{__name__=~"^ts_queue_latency_microseconds"}' + resources: + overrides: + namespace: + resource: namespace + service: + resource: service + pod: + resource: pod + name: + matches: "^(.*)_microseconds" + as: "ts_queue_latency_microseconds" + metricsQuery: ts_queue_latency_microseconds +``` + +Refer: [Prometheus Adapter External Metrics](https://github.com/kubernetes-sigs/prometheus-adapter/blob/master/docs/externalmetrics.md) + +- Install Prometheus adapter + +```bash +helm install -f adapter.yaml prometheus-adapter prometheus-community/prometheus-adapter +``` + +The output of above command is + +``` +NAME: adapter +LAST DEPLOYED: Wed Sep 8 19:49:28 2021 +NAMESPACE: default +STATUS: deployed +REVISION: 1 +TEST SUITE: None +NOTES: +adapter-prometheus-adapter has been deployed. +In a few minutes you should be able to list metrics using the following command(s): + + kubectl get --raw /apis/custom.metrics.k8s.io/v1beta1 + + kubectl get --raw /apis/external.metrics.k8s.io/v1beta1 +``` + +#### Check External metrics list + +```bash +kubectl get --raw /apis/external.metrics.k8s.io/v1beta1 | jq +``` + +```json +{ + "kind": "APIResourceList", + "apiVersion": "v1", + "groupVersion": "external.metrics.k8s.io/v1beta1", + "resources": [ + { + "name": "ts_queue_latency_microseconds", + "singularName": "", + "namespaced": true, + "kind": "ExternalMetricValueList", + "verbs": [ + "get" + ] + } + ] +} +``` + +### 4. Deploy Horizontal Pod Autoscaler for Torchserve + +- Change `targetValue` as per requirement. + +```yaml +kind: HorizontalPodAutoscaler +apiVersion: autoscaling/v2beta1 +metadata: + name: torchserve +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: torchserve + # autoscale between 1 and 5 replicas + minReplicas: 1 + maxReplicas: 5 + metrics: + - type: External + external: + metricName: ts_queue_latency_microseconds + targetValue: "7000000m" +``` + +```bash +kubectl apply -f hpa.yaml +``` + +### 5. Check status of HPG + +```bash +kubectl describe hpa torchserve +``` + +```bash +Name: torchserve +Namespace: default +Labels: +Annotations: +CreationTimestamp: Wed, 08 Sep 2021 20:09:48 +0530 +Reference: Deployment/torchserve +Metrics: ( current / target ) + "ts_queue_latency_microseconds" (target value): 5257630m / 7k +Min replicas: 1 +Max replicas: 5 +Deployment pods: 3 current / 3 desired +Conditions: + Type Status Reason Message + ---- ------ ------ ------- + AbleToScale True ReadyForNewScale recommended size matches current size + ScalingActive True ValidMetricFound the HPA was able to successfully calculate a replica count from external metric ts_queue_latency_microseconds(nil) + ScalingLimited False DesiredWithinRange the desired count is within the acceptable range +Events: +``` \ No newline at end of file diff --git a/kubernetes/hpa.yaml b/kubernetes/hpa.yaml new file mode 100644 index 0000000000..64671eb3c5 --- /dev/null +++ b/kubernetes/hpa.yaml @@ -0,0 +1,17 @@ +kind: HorizontalPodAutoscaler +apiVersion: autoscaling/v2beta1 +metadata: + name: torchserve +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: torchserve + # autoscale between 1 and 10 replicas + minReplicas: 1 + maxReplicas: 5 + metrics: + - type: External + external: + metricName: ts_queue_latency_microseconds + targetValue: "7000000m"