diff --git a/docs/configuration.md b/docs/configuration.md index fc6442a9f4..e8f3a5a388 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -212,8 +212,6 @@ Set nvidia environment variables. For example: ### Enable metrics api * `enable_metrics_api` : Enable or disable metric apis i.e. it can be either `true` or `false`. Default: true (Enabled) -* `metrics_format` : Use this to specify metric report format . At present, the only supported and default value for this is `prometheus` - This is used in conjunction with `enable_metrics_api` option above. ### Config model * `models`: Use this to set configurations specific to a model. The value is presented in json format. diff --git a/docs/metrics.md b/docs/metrics.md index b865975a69..fc578c097a 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -18,8 +18,9 @@ Frontend metrics include system level metrics. The host resource utilization fro Torchserve provides an API to collect custom backend metrics. Metrics defined by a custom service or handler code can be collected per request or per a batch of requests. Two metric modes are supported, i.e `log` and `prometheus`. The default mode is `log`. Metrics mode can be configured using the `metrics_mode` configuration option in `config.properties` or `TS_METRICS_MODE` environment variable. -In `log` mode, Metrics are logged and can be aggregated by metric agents. +For further details on `config.properties` and environment variable based configuration, refer [Torchserve config](configuration.md) docs. +In `log` mode, Metrics are logged and can be aggregated by metric agents. Metrics are collected by default at the following locations in `log` mode: * Frontend metrics - `log_directory/ts_metrics.log` @@ -31,27 +32,27 @@ In `prometheus` mode, all metrics are made available in prometheus format via th ## Frontend Metrics -| Metric Name | Type | Unit | Dimensions | Semantics | -|-----------------------------------|---------|--------------|-----------------------------------|-----------------------------------------------------------------------------| -| Requests2XX | counter | Count | Level, Hostname | Total number of requests with response in 200-300 status code range | -| Requests4XX | counter | Count | Level, Hostname | Total number of requests with response in 400-500 status code range | -| Requests5XX | counter | Count | Level, Hostname | Total number of requests with response status code above 500 | -| ts_inference_requests_total | counter | Count | ModelName, ModelVersion, Hostname | Total number of inference requests received | -| ts_inference_latency_microseconds | counter | Microseconds | ModelName, ModelVersion, Hostname | Total inference latency in Microseconds | -| ts_queue_latency_microseconds | counter | Microseconds | ModelName, ModelVersion, Hostname | Total queue latency in Microseconds | -| QueueTime | gauge | Milliseconds | Level, Hostname | Time spent by a job in request queue in Milliseconds | -| WorkerThreadTime | gauge | Milliseconds | Level, Hostname | Time spent in worker thread excluding backend response time in Milliseconds | -| WorkerLoadTime | gauge | Milliseconds | WorkerName, Level, Hostname | Time taken by worker to load model in Milliseconds | -| CPUUtilization | gauge | Percent | Level, Hostname | CPU utilization on host | -| MemoryUsed | gauge | Megabytes | Level, Hostname | Memory used on host | -| MemoryAvailable | gauge | Megabytes | Level, Hostname | Memory available on host | -| MemoryUtilization | gauge | Percent | Level, Hostname | Memory utilization on host | -| DiskUsage | gauge | Gigabytes | Level, Hostname | Disk used on host | -| DiskUtilization | gauge | Percent | Level, Hostname | Disk used on host | -| DiskAvailable | gauge | Gigabytes | Level, Hostname | Disk available on host | -| GPUMemoryUtilization | gauge | Percent | Level, DeviceId, Hostname | GPU memory utilization on host, DeviceId | -| GPUMemoryUsed | gauge | Megabytes | Level, DeviceId, Hostname | GPU memory used on host, DeviceId | -| GPUUtilization | gauge | Percent | Level, DeviceId, Hostname | GPU utilization on host, DeviceId | +| Metric Name | Type | Unit | Dimensions | Semantics | +|-----------------------------------|---------|--------------|-------------------------------------|-----------------------------------------------------------------------------| +| Requests2XX | counter | Count | Level, Hostname | Total number of requests with response in 200-300 status code range | +| Requests4XX | counter | Count | Level, Hostname | Total number of requests with response in 400-500 status code range | +| Requests5XX | counter | Count | Level, Hostname | Total number of requests with response status code above 500 | +| ts_inference_requests_total | counter | Count | model_name, model_version, hostname | Total number of inference requests received | +| ts_inference_latency_microseconds | counter | Microseconds | model_name, model_version, hostname | Total inference latency in Microseconds | +| ts_queue_latency_microseconds | counter | Microseconds | model_name, model_version, hostname | Total queue latency in Microseconds | +| QueueTime | gauge | Milliseconds | Level, Hostname | Time spent by a job in request queue in Milliseconds | +| WorkerThreadTime | gauge | Milliseconds | Level, Hostname | Time spent in worker thread excluding backend response time in Milliseconds | +| WorkerLoadTime | gauge | Milliseconds | WorkerName, Level, Hostname | Time taken by worker to load model in Milliseconds | +| CPUUtilization | gauge | Percent | Level, Hostname | CPU utilization on host | +| MemoryUsed | gauge | Megabytes | Level, Hostname | Memory used on host | +| MemoryAvailable | gauge | Megabytes | Level, Hostname | Memory available on host | +| MemoryUtilization | gauge | Percent | Level, Hostname | Memory utilization on host | +| DiskUsage | gauge | Gigabytes | Level, Hostname | Disk used on host | +| DiskUtilization | gauge | Percent | Level, Hostname | Disk used on host | +| DiskAvailable | gauge | Gigabytes | Level, Hostname | Disk available on host | +| GPUMemoryUtilization | gauge | Percent | Level, DeviceId, Hostname | GPU memory utilization on host, DeviceId | +| GPUMemoryUsed | gauge | Megabytes | Level, DeviceId, Hostname | GPU memory used on host, DeviceId | +| GPUUtilization | gauge | Percent | Level, DeviceId, Hostname | GPU utilization on host, DeviceId | ## Backend Metrics diff --git a/docs/metrics_api.md b/docs/metrics_api.md index edbee43a74..c1ca10c2d5 100644 --- a/docs/metrics_api.md +++ b/docs/metrics_api.md @@ -1,38 +1,83 @@ # Metrics API -Metrics API is listening on port 8082 and only accessible from localhost by default. To change the default setting, see [TorchServe Configuration](configuration.md). The default metrics endpoint returns Prometheus formatted metrics. You can query metrics using curl requests or point a [Prometheus Server](#prometheus-server) to the endpoint and use [Grafana](#grafana) for dashboards. +Metrics API is listening on port 8082 and only accessible from localhost by default. To change the default setting, see [TorchServe Configuration](configuration.md). The default metrics endpoint returns Prometheus formatted metrics when [metrics_mode](https://github.com/pytorch/serve/blob/master/docs/metrics.md) configuration is set to `prometheus`. You can query metrics using curl requests or point a [Prometheus Server](#prometheus-server) to the endpoint and use [Grafana](#grafana) for dashboards. -By default these APIs are enable however same can be disabled by setting `enable_metrics_api=false` in torchserve config.properties file. +By default these APIs are enabled however same can be disabled by setting `enable_metrics_api=false` in torchserve config.properties file. For details refer [Torchserve config](configuration.md) docs. ```console curl http://127.0.0.1:8082/metrics -# HELP ts_inference_latency_microseconds Cumulative inference duration in microseconds -# TYPE ts_inference_latency_microseconds counter -ts_inference_latency_microseconds{uuid="d5f84dfb-fae8-4f92-b217-2f385ca7470b",model_name="noopversioned",model_version="1.11",} 1990.348 -ts_inference_latency_microseconds{uuid="d5f84dfb-fae8-4f92-b217-2f385ca7470b",model_name="noop",model_version="default",} 2032.411 -# HELP ts_inference_requests_total Total number of inference requests. +# HELP Requests5XX Torchserve prometheus counter metric with unit: Count +# TYPE Requests5XX counter +# HELP DiskUsage Torchserve prometheus gauge metric with unit: Gigabytes +# TYPE DiskUsage gauge +DiskUsage{Level="Host",Hostname="88665a372f4b.ant.amazon.com",} 20.054508209228516 +# HELP GPUUtilization Torchserve prometheus gauge metric with unit: Percent +# TYPE GPUUtilization gauge +# HELP PredictionTime Torchserve prometheus gauge metric with unit: ms +# TYPE PredictionTime gauge +PredictionTime{ModelName="resnet18",Level="Model",Hostname="88665a372f4b.ant.amazon.com",} 83.13 +# HELP WorkerLoadTime Torchserve prometheus gauge metric with unit: Milliseconds +# TYPE WorkerLoadTime gauge +WorkerLoadTime{WorkerName="W-9000-resnet18_1.0",Level="Host",Hostname="88665a372f4b.ant.amazon.com",} 4593.0 +WorkerLoadTime{WorkerName="W-9001-resnet18_1.0",Level="Host",Hostname="88665a372f4b.ant.amazon.com",} 4592.0 +# HELP MemoryAvailable Torchserve prometheus gauge metric with unit: Megabytes +# TYPE MemoryAvailable gauge +MemoryAvailable{Level="Host",Hostname="88665a372f4b.ant.amazon.com",} 5829.7421875 +# HELP GPUMemoryUsed Torchserve prometheus gauge metric with unit: Megabytes +# TYPE GPUMemoryUsed gauge +# HELP ts_inference_requests_total Torchserve prometheus counter metric with unit: Count # TYPE ts_inference_requests_total counter -ts_inference_requests_total{uuid="d5f84dfb-fae8-4f92-b217-2f385ca7470b",model_name="noopversioned",model_version="1.11",} 1.0 -ts_inference_requests_total{uuid="d5f84dfb-fae8-4f92-b217-2f385ca7470b",model_name="noop",model_version="default",} 1.0 -# HELP ts_queue_latency_microseconds Cumulative queue duration in microseconds +ts_inference_requests_total{model_name="resnet18",model_version="default",hostname="88665a372f4b.ant.amazon.com",} 3.0 +# HELP GPUMemoryUtilization Torchserve prometheus gauge metric with unit: Percent +# TYPE GPUMemoryUtilization gauge +# HELP HandlerTime Torchserve prometheus gauge metric with unit: ms +# TYPE HandlerTime gauge +HandlerTime{ModelName="resnet18",Level="Model",Hostname="88665a372f4b.ant.amazon.com",} 82.93 +# HELP ts_inference_latency_microseconds Torchserve prometheus counter metric with unit: Microseconds +# TYPE ts_inference_latency_microseconds counter +ts_inference_latency_microseconds{model_name="resnet18",model_version="default",hostname="88665a372f4b.ant.amazon.com",} 290371.129 +# HELP CPUUtilization Torchserve prometheus gauge metric with unit: Percent +# TYPE CPUUtilization gauge +CPUUtilization{Level="Host",Hostname="88665a372f4b.ant.amazon.com",} 0.0 +# HELP MemoryUsed Torchserve prometheus gauge metric with unit: Megabytes +# TYPE MemoryUsed gauge +MemoryUsed{Level="Host",Hostname="88665a372f4b.ant.amazon.com",} 8245.62109375 +# HELP QueueTime Torchserve prometheus gauge metric with unit: Milliseconds +# TYPE QueueTime gauge +QueueTime{Level="Host",Hostname="88665a372f4b.ant.amazon.com",} 0.0 +# HELP ts_queue_latency_microseconds Torchserve prometheus counter metric with unit: Microseconds # TYPE ts_queue_latency_microseconds counter -ts_queue_latency_microseconds{uuid="d5f84dfb-fae8-4f92-b217-2f385ca7470b",model_name="noopversioned",model_version="1.11",} 364.884 -ts_queue_latency_microseconds{uuid="d5f84dfb-fae8-4f92-b217-2f385ca7470b",model_name="noop",model_version="default",} 82.349 +ts_queue_latency_microseconds{model_name="resnet18",model_version="default",hostname="88665a372f4b.ant.amazon.com",} 365.21 +# HELP DiskUtilization Torchserve prometheus gauge metric with unit: Percent +# TYPE DiskUtilization gauge +DiskUtilization{Level="Host",Hostname="88665a372f4b.ant.amazon.com",} 5.8 +# HELP Requests2XX Torchserve prometheus counter metric with unit: Count +# TYPE Requests2XX counter +Requests2XX{Level="Host",Hostname="88665a372f4b.ant.amazon.com",} 8.0 +# HELP Requests4XX Torchserve prometheus counter metric with unit: Count +# TYPE Requests4XX counter +# HELP WorkerThreadTime Torchserve prometheus gauge metric with unit: Milliseconds +# TYPE WorkerThreadTime gauge +WorkerThreadTime{Level="Host",Hostname="88665a372f4b.ant.amazon.com",} 1.0 +# HELP DiskAvailable Torchserve prometheus gauge metric with unit: Gigabytes +# TYPE DiskAvailable gauge +DiskAvailable{Level="Host",Hostname="88665a372f4b.ant.amazon.com",} 325.05113983154297 +# HELP MemoryUtilization Torchserve prometheus gauge metric with unit: Percent +# TYPE MemoryUtilization gauge +MemoryUtilization{Level="Host",Hostname="88665a372f4b.ant.amazon.com",} 64.4 ``` ```console curl "http://127.0.0.1:8082/metrics?name[]=ts_inference_latency_microseconds&name[]=ts_queue_latency_microseconds" --globoff -# HELP ts_inference_latency_microseconds Cumulative inference duration in microseconds -# TYPE ts_inference_latency_microseconds counter -ts_inference_latency_microseconds{uuid="d5f84dfb-fae8-4f92-b217-2f385ca7470b",model_name="noopversioned",model_version="1.11",} 1990.348 -ts_inference_latency_microseconds{uuid="d5f84dfb-fae8-4f92-b217-2f385ca7470b",model_name="noop",model_version="default",} 2032.411 -# HELP ts_queue_latency_microseconds Cumulative queue duration in microseconds +# HELP ts_queue_latency_microseconds Torchserve prometheus counter metric with unit: Microseconds # TYPE ts_queue_latency_microseconds counter -ts_queue_latency_microseconds{uuid="d5f84dfb-fae8-4f92-b217-2f385ca7470b",model_name="noopversioned",model_version="1.11",} 364.884 -ts_queue_latency_microseconds{uuid="d5f84dfb-fae8-4f92-b217-2f385ca7470b",model_name="noop",model_version="default",} 82.349 +ts_queue_latency_microseconds{model_name="resnet18",model_version="default",hostname="88665a372f4b.ant.amazon.com",} 365.21 +# HELP ts_inference_latency_microseconds Torchserve prometheus counter metric with unit: Microseconds +# TYPE ts_inference_latency_microseconds counter +ts_inference_latency_microseconds{model_name="resnet18",model_version="default",hostname="88665a372f4b.ant.amazon.com",} 290371.129 ``` #### Prometheus server @@ -52,15 +97,15 @@ scrape_configs: static_configs: - targets: ['localhost:8082'] #TorchServe metrics endpoint ``` -Navigate to `http://localhost:9090/` on a browser to execute queries and create graphs +Navigate to `http://localhost:9090/` on a browser to execute queries and create graphs -PrometheusServer +Prometheus Server #### Grafana Once you have the Torchserve and Prometheus servers running, you can further [setup](https://prometheus.io/docs/visualization/grafana/) Grafana, point it to Prometheus server and navigate to `http://localhost:3000/` to create dashboards and graphs. -You can use command given below to start Grafana - +You can use command given below to start Grafana - `sudo systemctl daemon-reload && sudo systemctl enable grafana-server && sudo systemctl start grafana-server` -Screen Shot 2020-07-08 at 5 51 57 PM +Grafana Dashboard diff --git a/examples/cloudformation/README.md b/examples/cloudformation/README.md index ac460d7309..1150bdc427 100644 --- a/examples/cloudformation/README.md +++ b/examples/cloudformation/README.md @@ -66,13 +66,13 @@ aws cloudformation create-stack \ > curl --insecure "/metrics" # HELP ts_queue_latency_microseconds Cumulative queue duration in microseconds # TYPE ts_queue_latency_microseconds counter -ts_queue_latency_microseconds{uuid="e275b494-3d54-45bd-a640-abca741a070b",model_name="squeezenet1_1",model_version="default",} 364.07800000000003 +ts_queue_latency_microseconds{model_name="squeezenet1_1",model_version="default",hostname="test_host",} 364.07800000000003 # HELP ts_inference_latency_microseconds Cumulative inference duration in microseconds # TYPE ts_inference_latency_microseconds counter -ts_inference_latency_microseconds{uuid="e275b494-3d54-45bd-a640-abca741a070b",model_name="squeezenet1_1",model_version="default",} 128010.02100000001 +ts_inference_latency_microseconds{model_name="squeezenet1_1",model_version="default",hostname="test_host",} 128010.02100000001 # HELP ts_inference_requests_total Total number of inference requests. # TYPE ts_inference_requests_total counter -ts_inference_requests_total{uuid="e275b494-3d54-45bd-a640-abca741a070b",model_name="squeezenet1_1",model_version="default",} 4.0 +ts_inference_requests_total{model_name="squeezenet1_1",model_version="default",hostname="test_host",} 4.0 ``` @@ -97,7 +97,7 @@ aws cloudformation create-stack \ ParameterKey=ModelPath,ParameterValue= ``` -e.g. +e.g. ``` aws cloudformation create-stack \ --stack-name torchserve \ @@ -149,13 +149,13 @@ aws cloudformation create-stack \ > curl "/metrics" # HELP ts_queue_latency_microseconds Cumulative queue duration in microseconds # TYPE ts_queue_latency_microseconds counter -ts_queue_latency_microseconds{uuid="2b3a4b5b-5131-413a-a725-2abcae5d55ab",model_name="squeezenet1_1",model_version="default",} 932.164 +ts_queue_latency_microseconds{model_name="squeezenet1_1",model_version="default",hostname="test_host",} 932.164 # HELP ts_inference_latency_microseconds Cumulative inference duration in microseconds # TYPE ts_inference_latency_microseconds counter -ts_inference_latency_microseconds{uuid="2b3a4b5b-5131-413a-a725-2abcae5d55ab",model_name="squeezenet1_1",model_version="default",} 411702.625 +ts_inference_latency_microseconds{model_name="squeezenet1_1",model_version="default",hostname="test_host",} 411702.625 # HELP ts_inference_requests_total Total number of inference requests. # TYPE ts_inference_requests_total counter -ts_inference_requests_total{uuid="2b3a4b5b-5131-413a-a725-2abcae5d55ab",model_name="squeezenet1_1",model_version="default",} 9.0 +ts_inference_requests_total{model_name="squeezenet1_1",model_version="default",hostname="test_host",} 9.0 ``` ## CloudWatch Logging diff --git a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java index 03cee69f95..115a021788 100644 --- a/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java +++ b/frontend/server/src/main/java/org/pytorch/serve/util/ConfigManager.java @@ -94,7 +94,6 @@ public final class ConfigManager { private static final String TS_PREFER_DIRECT_BUFFER = "prefer_direct_buffer"; private static final String TS_ALLOWED_URLS = "allowed_urls"; private static final String TS_INSTALL_PY_DEP_PER_MODEL = "install_py_dep_per_model"; - private static final String TS_METRICS_FORMAT = "metrics_format"; private static final String TS_ENABLE_METRICS_API = "enable_metrics_api"; private static final String TS_GRPC_INFERENCE_PORT = "grpc_inference_port"; private static final String TS_GRPC_MANAGEMENT_PORT = "grpc_management_port"; @@ -340,10 +339,6 @@ public boolean getInstallPyDepPerModel() { return Boolean.parseBoolean(getProperty(TS_INSTALL_PY_DEP_PER_MODEL, "false")); } - public String getMetricsFormat() { - return getProperty(TS_METRICS_FORMAT, METRIC_FORMAT_PROMETHEUS); - } - public boolean isMetricApiEnable() { return Boolean.parseBoolean(getProperty(TS_ENABLE_METRICS_API, "true")); } @@ -664,8 +659,6 @@ public String dumpConfigurations() { + getAllowedUrls() + "\nCustom python dependency for model allowed: " + prop.getProperty(TS_INSTALL_PY_DEP_PER_MODEL, "false") - + "\nMetrics report format: " - + prop.getProperty(TS_METRICS_FORMAT, METRIC_FORMAT_PROMETHEUS) + "\nEnable metrics API: " + prop.getProperty(TS_ENABLE_METRICS_API, "true") + "\nMetrics mode: " diff --git a/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java b/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java index 95e1fbc804..e4be893019 100644 --- a/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java +++ b/frontend/server/src/test/java/org/pytorch/serve/TestUtils.java @@ -50,9 +50,9 @@ public final class TestUtils { private static Channel metricsChannel; private static String tsInferLatencyPattern = "ts_inference_latency_microseconds\\{" - + "ModelName=\"%s\"," - + "ModelVersion=\"%s\"," - + "Hostname=\".+\",\\}\\s\\d+(\\.\\d+)"; + + "model_name=\"%s\"," + + "model_version=\"%s\"," + + "hostname=\".+\",\\}\\s\\d+(\\.\\d+)"; private TestUtils() {} diff --git a/kubernetes/AKS/config.properties b/kubernetes/AKS/config.properties index 9f7ad861a7..e6003a92c8 100644 --- a/kubernetes/AKS/config.properties +++ b/kubernetes/AKS/config.properties @@ -2,7 +2,7 @@ inference_address=http://0.0.0.0:8080 management_address=http://0.0.0.0:8081 metrics_address=http://0.0.0.0:8082 enable_metrics_api=true -metrics_format=prometheus +metrics_mode=prometheus NUM_WORKERS=1 number_of_gpu=1 number_of_netty_threads=32 diff --git a/kubernetes/kserve/README.md b/kubernetes/kserve/README.md index d040ed7d63..c35cd2cabf 100644 --- a/kubernetes/kserve/README.md +++ b/kubernetes/kserve/README.md @@ -117,7 +117,7 @@ grpc_management_port=7071 enable_envvars_config=true install_py_dep_per_model=true enable_metrics_api=true -metrics_format=prometheus +metrics_mode=prometheus NUM_WORKERS=1 number_of_netty_threads=4 job_queue_size=10 @@ -189,7 +189,7 @@ Refer link for more [examples](https://github.com/kserve/kserve/tree/master/docs KServe supports different types of inputs (ex: tensor, bytes). Use the following instructions to generate input files based on its type. -[MNIST input generation](kf_request_json/v2/mnist/README.md##-Preparing-input) +[MNIST input generation](kf_request_json/v2/mnist/README.md##-Preparing-input) [Bert input generation](kf_request_json/v2/bert/README.md##-Preparing-input) @@ -233,7 +233,7 @@ Refer the individual readmes for KServe : * [BERT](https://github.com/kserve/kserve/blob/master/docs/samples/v1beta1/custom/torchserve/bert-sample/hugging-face-bert-sample.md) * [MNIST](https://github.com/kserve/kserve/blob/master/docs/samples/v1beta1/torchserve/README.md) -Sample input JSON file for v1 and v2 protocols +Sample input JSON file for v1 and v2 protocols For v1 protocol diff --git a/kubernetes/kserve/config.properties b/kubernetes/kserve/config.properties index 7c9c33589b..422e53d138 100644 --- a/kubernetes/kserve/config.properties +++ b/kubernetes/kserve/config.properties @@ -7,7 +7,7 @@ grpc_management_port=7071 enable_envvars_config=true install_py_dep_per_model=true enable_metrics_api=true -metrics_format=prometheus +metrics_mode=prometheus NUM_WORKERS=1 number_of_netty_threads=4 job_queue_size=10 diff --git a/kubernetes/kserve/developer_guide.md b/kubernetes/kserve/developer_guide.md index 1692db53ec..5a3281ebc6 100644 --- a/kubernetes/kserve/developer_guide.md +++ b/kubernetes/kserve/developer_guide.md @@ -33,7 +33,7 @@ grpc_management_port=7071 enable_envvars_config=true install_py_dep_per_model=true enable_metrics_api=true -metrics_format=prometheus +metrics_mode=prometheus NUM_WORKERS=1 number_of_netty_threads=4 job_queue_size=10 diff --git a/kubernetes/kserve/image_transformer/README.md b/kubernetes/kserve/image_transformer/README.md index 05d3a2d3be..733d71aee3 100644 --- a/kubernetes/kserve/image_transformer/README.md +++ b/kubernetes/kserve/image_transformer/README.md @@ -86,7 +86,7 @@ inference_address=http://0.0.0.0:8085 management_address=http://0.0.0.0:8085 metrics_address=http://0.0.0.0:8082 enable_metrics_api=true -metrics_format=prometheus +metrics_mode=prometheus NUM_WORKERS=1 number_of_netty_threads=4 job_queue_size=10 diff --git a/kubernetes/kserve/kf_request_json/v2/bert/README.md b/kubernetes/kserve/kf_request_json/v2/bert/README.md index 8f157f57ec..333a2cdcc7 100644 --- a/kubernetes/kserve/kf_request_json/v2/bert/README.md +++ b/kubernetes/kserve/kf_request_json/v2/bert/README.md @@ -16,9 +16,9 @@ Run the following command to download the model ``` python Download_Transformer_models.py ``` - + ### Generate mar file - + ```bash torch-model-archiver --model-name BERTSeqClassification --version 1.0 \ --serialized-file Transformer_model/pytorch_model.bin \ @@ -33,7 +33,7 @@ Move the mar file to model-store ``` sudo mv BERTSeqClassification.mar /mnt/models/model-store ``` - + and use the following config properties (`/mnt/models/config`) ``` @@ -44,7 +44,7 @@ enable_envvars_config=true install_py_dep_per_model=true enable_metrics_api=true service_envelope=kservev2 -metrics_format=prometheus +metrics_mode=prometheus NUM_WORKERS=1 number_of_netty_threads=4 job_queue_size=10 @@ -58,7 +58,7 @@ Use [bert_bytes_v2.json](bert_bytes_v2.json) or [bert_tensor_v2](bert_tensor_v2. For new sample text, follow the instructions below -For bytes input, use [tobytes](tobytes.py) utility. +For bytes input, use [tobytes](tobytes.py) utility. ``` python tobytes.py --input_text "this year business is good" @@ -118,4 +118,4 @@ curl -v -H "ContentType: application/json" http://localhost:8080/v2/models/BERTS Expected output ```bash {"id": "33abc661-7265-42fc-b7d9-44e5f79a7a67", "model_name": "BERTSeqClassification", "model_version": "1.0", "outputs": [{"name": "predict", "shape": [], "datatype": "BYTES", "data": ["Not Accepted"]}]} -``` \ No newline at end of file +``` diff --git a/kubernetes/kserve/kf_request_json/v2/mnist/README.md b/kubernetes/kserve/kf_request_json/v2/mnist/README.md index 9fa03a3a3e..f8d41eb552 100644 --- a/kubernetes/kserve/kf_request_json/v2/mnist/README.md +++ b/kubernetes/kserve/kf_request_json/v2/mnist/README.md @@ -7,7 +7,7 @@ model locally using kserve. Clone [pytorch/serve](https://github.com/pytorch/serve) repository navigate to `examples/image_classifier/mnist` - + ```bash torch-model-archiver --model-name mnist --version 1.0 \ --model-file mnist.py \ @@ -17,7 +17,7 @@ torch-model-archiver --model-name mnist --version 1.0 \ The command will create `mnist.mar` file in current directory -Move the mar file to model-store +Move the mar file to model-store ``` sudo mv mnist.mar /mnt/models/model-store @@ -33,7 +33,7 @@ enable_envvars_config=true install_py_dep_per_model=true enable_metrics_api=true service_envelope=kservev2 -metrics_format=prometheus +metrics_mode=prometheus NUM_WORKERS=1 number_of_netty_threads=4 job_queue_size=10 @@ -49,7 +49,7 @@ For generating input for a new image follow the instructions given below Move to `kubernetes/kserve/kf_request_json/v2/mnist` -For bytes input, use [tobytes](tobytes.py) utility. +For bytes input, use [tobytes](tobytes.py) utility. ``` python tobytes.py 0.png @@ -121,4 +121,4 @@ curl -v -H "ContentType: application/json" http://localhost:8080/v2/models/mnist Expected output ```bash {"id": "d3b15cad-50a2-4eaf-80ce-8b0a428bd298", "model_name": "mnist", "model_version": "1.0", "outputs": [{"name": "explain", "shape": [1, 28, 28], "datatype": "FP64", "data": [-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.0, 0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, -0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, 0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0040547529196303285, -0.000226128774499257, -0.00012734138382422276, 0.005648369544853077, 0.0089047843954152, 0.002638536593970295, 0.002680245911942565, -0.0026578015819202173, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.00024465771891337887, 0.0008218450954311162, 0.01528591767842519, 0.007512832335428859, 0.00709498458333515, 0.0034056686436576803, -0.002091925041823873, -0.0007800293875604465, 0.02299587827540853, 0.019004329367380418, -0.0012529559050418735, -0.0014666116646934577, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.005298396405518712, -0.0007901605729004231, 0.0039060659926479398, 0.023174082126728335, 0.01723791770922474, 0.010867034167828598, 0.003001563229273835, 0.00622421771715703, 0.006120712207087491, 0.01673632965122119, 0.005674718948781803, 0.004344134599735745, -0.0012328422311881568, -0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, 0.0006867353833785289, 0.009772899792600862, -0.0038754932221901437, 0.001798693579973005, 0.001307544047675232, -0.0024510981010352315, -0.0008806773488194292, -0.0, -0.0, -0.00014277890760828639, -0.009322313235257151, 0.020608317727589167, 0.004351394518148479, -0.0007875566214137449, -0.0009075897508410689, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.00022247238084657642, -0.0007829029819622099, 0.0026663695200516055, 0.0009733366691924418, 0.0, -0.0, 0.0, 0.0, 0.0, 0.0, -0.0, 0.0004323207980879993, 0.023657171939959983, 0.01069484496100618, -0.0023759529165659743, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.002074797197335781, -0.002320101263777886, -0.001289920656543141, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.007629679763806616, 0.01044862710854819, 0.00025032875474040415, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0003770836745884539, -0.005156369309364184, 0.0012477582083019567, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, -0.0, -4.442513564501309e-05, 0.010248046436803096, 0.0009971133914441863, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, -0.0, 0.0, 0.0, -0.0, 0.0004501048922351147, -0.00196305355861066, -0.0006664792277975681, 0.0020157403871024866, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.002214456978582924, 0.008361583668963536, 0.0031401942747203444, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0028943545250037983, -0.0031301382844878753, 0.002113252994616467, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, -0.0, -0.0, -0.0010321050071136991, 0.008905753948020954, 0.0028464383724280478, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, -0.0053052889804602885, -0.0019271100770928186, 0.0012090042664300153, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0011945155805738324, 0.005654442809865844, 0.0020132075147173286, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, -0.0014689358119857122, 0.0010743412654248086, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.0, -0.0017047980433136346, 0.0029066051664685937, -0.0007805868937027288, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.0, 5.541726090138969e-05, 0.0014516115182299915, 0.0002827700518397855, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.001440140782635336, 0.002381249982038837, 0.002146825452068144, -0.0, -0.0, 0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.001150052970321427, 0.0002865015237050364, 0.0029798150346815985, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.001775029606380323, 0.000833985914685474, -0.003770739075457816, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, -0.0006093176893524411, -0.00046905781658387527, 0.0034053217440919658, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.0, -0.0007450012183962096, 0.001298767353118675, -0.008499247802184222, -6.145165255574976e-05, -0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, 0.0, 0.0011809726462884672, -0.0018384763902449712, 0.005411106715800028, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, -0.0021392341817010304, 0.0003259163122540385, -0.005276118905978749, -0.0019509840184772497, -9.545685077687876e-07, 0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0007772404694664217, -0.0001517954537059768, 0.006481484678129392, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 8.098064554131295e-05, -0.0024904264199929506, -0.0020718618328775897, -5.3411287747038166e-05, -0.0004556472202791715, 0.0, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0022750984867578, 0.001716405971437602, 0.0003221344811922982, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0015560282437342534, 9.107229584202956e-05, 0.0008772841867241755, 0.0006502979194500701, -0.004128780661881036, 0.0006030386196211547, 0.0, -0.0, 0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, 0.0, -0.0, -0.0, 0.0, 0.0, 0.0013959959731925453, 0.0026791526421029673, 0.002399500793142178, -0.00044960969955281656, 0.003101832495190209, 0.007494535809079955, 0.002864118744003058, -0.003052590549800204, 0.003420222341277871, 0.0014924017873988514, -0.0009357389226494119, 0.0007856229438140384, -0.001843397373255761, 1.6031851430693252e-05, 0.0, 0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, -0.000699901824825285, 0.0043822508549258565, -0.003541931476855951, -0.0028896746311921715, -0.0004873454583246359, -0.006087345141728267, 0.000388224886755815, 0.002533641621974457, -0.004352836429303485, -0.0006079421449756437, -0.003810133409713042, -0.0008284413779488711, 0.0, -0.0, 0.0, 0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0010901530854686326, -0.013135007707490608, 0.0004734520308098294, 0.0020504232707536456, -0.006609452262924153, 0.0023647861306777536, 0.004678920703192049, -0.0018122526857900652, 0.0021375383049022263, 0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, 0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}]} -``` \ No newline at end of file +``` diff --git a/kubernetes/kserve/kserve_wrapper/README.md b/kubernetes/kserve/kserve_wrapper/README.md index f235de00b7..54837b945d 100644 --- a/kubernetes/kserve/kserve_wrapper/README.md +++ b/kubernetes/kserve/kserve_wrapper/README.md @@ -77,7 +77,7 @@ grpc_management_port=7071 enable_envvars_config=true install_py_dep_per_model=true enable_metrics_api=true -metrics_format=prometheus +metrics_mode=prometheus NUM_WORKERS=1 number_of_netty_threads=4 job_queue_size=10 @@ -207,7 +207,7 @@ grpc_management_port=7071 enable_envvars_config=true install_py_dep_per_model=true enable_metrics_api=true -metrics_format=prometheus +metrics_mode=prometheus NUM_WORKERS=1 number_of_netty_threads=4 job_queue_size=10 diff --git a/test/pytest/test_metrics.py b/test/pytest/test_metrics.py index 71059f8398..27547cf471 100644 --- a/test/pytest/test_metrics.py +++ b/test/pytest/test_metrics.py @@ -439,9 +439,9 @@ def test_metrics_prometheus_mode(): prometheus_metric_patterns = [ r'Requests2XX\{Level="Host",Hostname=".+",\} \d+\.\d+', - r'ts_inference_requests_total\{ModelName="densenet161",ModelVersion="default",Hostname=".+",\} \d+\.\d+', - r'ts_inference_latency_microseconds\{ModelName="densenet161",ModelVersion="default",Hostname=".+",\} \d+\.\d+', - r'ts_queue_latency_microseconds\{ModelName="densenet161",ModelVersion="default",Hostname=".+",\} \d+\.\d+', + r'ts_inference_requests_total\{model_name="densenet161",model_version="default",hostname=".+",\} \d+\.\d+', + r'ts_inference_latency_microseconds\{model_name="densenet161",model_version="default",hostname=".+",\} \d+\.\d+', + r'ts_queue_latency_microseconds\{model_name="densenet161",model_version="default",hostname=".+",\} \d+\.\d+', r'QueueTime\{Level="Host",Hostname=".+",\} \d+\.\d+', r'WorkerThreadTime\{Level="Host",Hostname=".+",\} \d+\.\d+', r'WorkerLoadTime\{WorkerName=".+",Level="Host",Hostname=".+",\} \d+\.\d+', diff --git a/ts/configs/metrics.yaml b/ts/configs/metrics.yaml index a9e7e2ff1f..696ddb85e1 100644 --- a/ts/configs/metrics.yaml +++ b/ts/configs/metrics.yaml @@ -1,6 +1,5 @@ dimensions: - &model_name "ModelName" - - &model_version "ModelVersion" - &worker_name "WorkerName" - &level "Level" - &device_id "DeviceId" @@ -19,13 +18,13 @@ ts_metrics: dimensions: [*level, *hostname] - name: ts_inference_requests_total unit: Count - dimensions: [*model_name, *model_version, *hostname] + dimensions: ["model_name", "model_version", "hostname"] - name: ts_inference_latency_microseconds unit: Microseconds - dimensions: [*model_name, *model_version, *hostname] + dimensions: ["model_name", "model_version", "hostname"] - name: ts_queue_latency_microseconds unit: Microseconds - dimensions: [*model_name, *model_version, *hostname] + dimensions: ["model_name", "model_version", "hostname"] gauge: - name: QueueTime unit: Milliseconds