Fix template reander issue and update docs

Signed-off-by: Yi Chen <github@chenyicn.net>
kubeflow · Mar 27, 2024 · 6c895a9 · 6c895a9
1 parent a135fa6
commit 6c895a9
Show file tree

Hide file tree

Showing 14 changed files with 33 additions and 42 deletions.
diff --git a/charts/modeljob/templates/job.yaml b/charts/modeljob/templates/job.yaml
@@ -158,12 +158,12 @@ spec:
               {{- if gt (int $gpuCount) 0}}
               nvidia.com/gpu: {{ .Values.gpuCount }}
               {{- end }}
-              {{ - if gt (int $gpuMemory) 0 }}
-              aliyun.com/gpu-mem: { { .Values.gpuMemory }}
-              {{ - end } }
-              {{ - if gt (int $gpuCore) 0 }}
+              {{- if gt (int $gpuMemory) 0 }}
+              aliyun.com/gpu-mem: {{ .Values.gpuMemory }}
+              {{- end }}
+              {{- if gt (int $gpuCore) 0 }}
               aliyun.com/gpu-core.percentage: {{ .Values.gpuCore }}
-              {{ - end }}
+              {{- end }}
           volumeMounts:
             {{- if .Values.dataset }}
             {{- range $pvcName, $mntPath := .Values.dataset}}

diff --git a/charts/pytorchjob/templates/ingress.yaml b/charts/pytorchjob/templates/ingress.yaml
@@ -13,8 +13,8 @@ metadata:
     createdBy: "PyTorchJob"
     controller-name: pytorch-operator
     group-name: kubeflow.org
-    job-name: { { .Release.Name } }
-    pytorch-job-name: { { .Release.Name } }
+    job-name: {{ .Release.Name }}
+    pytorch-job-name: {{ .Release.Name }}
 spec:
   rules:
     - http:

diff --git a/charts/seldon-core/templates/seldondeployment.yaml b/charts/seldon-core/templates/seldondeployment.yaml
@@ -41,12 +41,12 @@ spec:
                   {{- if gt (int $gpuCount) 0 }}
                   nvidia.com/gpu: {{ .Values.gpuCount }}
                   {{- end }}
-                  {{ - if gt (int $gpuMemory) 0 }}
+                  {{- if gt (int $gpuMemory) 0 }}
                   aliyun.com/gpu-mem: {{ .Values.gpuMemory }}
-                  {{ - end } }
-                  {{ - if gt (int $gpuCore) 0 }}
+                  {{- end }}
+                  {{- if gt (int $gpuCore) 0 }}
                   aliyun.com/gpu-core.percentage: {{ .Values.gpuCore }}
-                  {{ - end }}
+                  {{- end }}
       graph:
         implementation: {{ .Values.implementation }}
         modelUri: {{ .Values.modelUri }}

diff --git a/charts/tfjob/templates/ingress.yaml b/charts/tfjob/templates/ingress.yaml
@@ -13,7 +13,7 @@ metadata:
     role: tensorboard
     createdBy: "TFJob"
     group-name: kubeflow.org
-    tf-job-name: { { .Release.Name } }
+    tf-job-name: {{ .Release.Name }}
 spec:
   rules:
     - http:

diff --git a/charts/trtserving/templates/deployment.yaml b/charts/trtserving/templates/deployment.yaml
@@ -104,9 +104,9 @@ spec:
               {{- if gt (int $gpuMemory) 0}}
               aliyun.com/gpu-mem: {{ .Values.gpuMemory }}
               {{- end }}
-              {{ - if gt (int $gpuCore) 0 }}
+              {{- if gt (int $gpuCore) 0 }}
               aliyun.com/gpu-core.percentage: {{ .Values.gpuCore }}
-              {{ - end }}
+              {{- end }}
           volumeMounts:
             {{- if .Values.shareMemory }}
             - mountPath: /dev/shm

diff --git a/.../model/benchmark/benchmark_torchscript.md → ...nalyze/benchmark/benchmark_torchscript.md b/.../model/benchmark/benchmark_torchscript.md → ...nalyze/benchmark/benchmark_torchscript.md
@@ -65,7 +65,7 @@ Then give a profile configuration file named config.json like below.
 3\. Submit a model benchmark job.
 
 ```shell
-$ arena model benchmark \
+$ arena analyze benchmark \
   --name=resnet18-benchmark \
   --namespace=default \
   --image=registry.cn-beijing.aliyuncs.com/kube-ai/easy-inference:1.0.0 \
@@ -80,13 +80,13 @@ $ arena model benchmark \
 
 job.batch/resnet18-benchmark created
 INFO[0000] The model benchmark job resnet18-benchmark has been submitted successfully
-INFO[0000] You can run `arena model get resnet18-benchmark` to check the job status
+INFO[0000] You can run `arena analyze get resnet18-benchmark` to check the job status
 ```
 
 4\. List all the model benchmark jobs.
 
 ```shell
-$ arena model list
+$ arena analyze list
 
 NAMESPACE      NAME                STATUS   TYPE       DURATION  AGE  GPU(Requested)
 default  resnet18-benchmark  RUNNING  Benchmark  23s       23s  1
@@ -95,7 +95,7 @@ default  resnet18-benchmark  RUNNING  Benchmark  23s       23s  1
 5\. Get model benchmark job detail info.
 
 ```shell
-$ arena model get resnet18-benchmark
+$ arena analyze get resnet18-benchmark
 Name:       resnet18-benchmark
 Namespace:  default
 Type:       Benchmark
@@ -125,7 +125,3 @@ Benchmark finished, cost 60.00157570838928 s
 Benchmark result:
 {"p90_latency": 3.806, "p95_latency": 3.924, "p99_latency": 4.781, "min_latency": 3.665, "max_latency": 1555.418, "mean_latency": 3.88, "median_latency": 3.731, "throughput": 257, "gpu_mem_used": 1.47, "gpu_utilization": 38.39514839785918}
 ```
-
-
-
-
diff --git a/docs/model/index.md → docs/analyze/index.md b/docs/model/index.md → docs/analyze/index.md
@@ -1,7 +1,6 @@
 # Model Analyze Guide
 
-Welcome to the Arena Model Guide! This guide covers how to use the ``arena cli`` to profile the model to find performance bottleneck, and how to use tensorrt to optimize the inference performance, you can also benchmark the model to get inference metrics like qps, latency, gpu usage and so on. This page outlines the most common situations and questions that bring readers to this section.
-
+Welcome to the Arena Analyze Guide! This guide covers how to use the `arena cli` to profile the model to find performance bottleneck, and how to use tensorrt to optimize the inference performance, you can also benchmark the model to get inference metrics like qps, latency, gpu usage and so on. This page outlines the most common situations and questions that bring readers to this section.
 
 ## Who should use this guide?
 
@@ -15,8 +14,6 @@ After training you may get some models. If you want to know the model performanc
 
 * I want to [optimize the torchscript module with tensorrt](optimize/optimize_torchscript.md).
 
-
 ## Benchmark the model inference
 
 * I want to [benchmark the torchscript inference performance](benchmark/benchmark_torchscript.md).
-
diff --git a/docs/model/optimize/optimize_torchscript.md → .../analyze/optimize/optimize_torchscript.md b/docs/model/optimize/optimize_torchscript.md → .../analyze/optimize/optimize_torchscript.md
@@ -65,7 +65,7 @@ Then give a profile configuration file named config.json like below.
 3\. Submit a model optimize job.
 
 ```shell
-$ arena model optimize \
+$ arena analyze optimize \
     --name=resnet18-optimize \
     --namespace=default \
     --image=registry.cn-beijing.aliyuncs.com/kube-ai/easy-inference:1.0.0 \
@@ -78,13 +78,13 @@ $ arena model optimize \
 
 job.batch/resnet18-optimize created
 INFO[0002] The model optimize job resnet18-optimize has been submitted successfully
-INFO[0002] You can run `arena model get resnet18-optimize` to check the job status
+INFO[0002] You can run `arena analyze get resnet18-optimize` to check the job status
 ```
 
 4\. List all the model optimize jobs.
 
 ```shell
-$ arena model list
+$ arena analyze list
 
 NAMESPACE      NAME               STATUS   TYPE      DURATION  AGE  GPU(Requested)
 default-group  resnet18-optimize  RUNNING  Optimize  0s        1m   1
@@ -93,7 +93,7 @@ default-group  resnet18-optimize  RUNNING  Optimize  0s        1m   1
 5\. Get model optimize job detail info.
 
 ```shell
-$ arena model get resnet18-profile
+$ arena analyze get resnet18-profile
 Name:       resnet18-optimize
 Namespace:  default-group
 Type:       Optimize
@@ -111,4 +111,4 @@ Instances:
   resnet18-optimize-xrd6w  ContainerCreating  1m   0/1    0         cn-shenzhen.192.168.1.209
 ```
 
-6\. After the optimize job finished, you can see a new torchscript modue named opt_resnet18.pt in --export-path.
+6\. After the optimize job finished, you can see a new torchscript modue named opt_resnet18.pt in --export-path.
diff --git a/.../profile/1-torchscript-profile-result.jpg → .../profile/1-torchscript-profile-result.jpg b/.../profile/1-torchscript-profile-result.jpg → .../profile/1-torchscript-profile-result.jpg
diff --git a/docs/model/profile/profile_torchscript.md → docs/analyze/profile/profile_torchscript.md b/docs/model/profile/profile_torchscript.md → docs/analyze/profile/profile_torchscript.md
@@ -65,7 +65,7 @@ Then give a profile configuration file named config.json like below.
 3\. Submit a model profile job.
 
 ```shell
-$ arena model profile \
+$ arena analyze profile \
     --name=resnet18-profile \
     --namespace=default \
     --image=registry.cn-beijing.aliyuncs.com/kube-ai/easy-inference:1.0.0 \
@@ -82,13 +82,13 @@ service/resnet18-profile-tensorboard created
 deployment.apps/resnet18-profile-tensorboard created
 job.batch/resnet18-profile created
 INFO[0001] The model profile job resnet18-profile has been submitted successfully
-INFO[0001] You can run `arena model get resnet18-profile` to check the job status
+INFO[0001] You can run `arena analyze get resnet18-profile` to check the job status
 ```
 
 4\. List all the profile jobs.
 
 ```shell
-$ arena model list
+$ arena analyze list
 
 NAMESPACE      NAME              STATUS   TYPE     DURATION  AGE  GPU(Requested)
 default  resnet18-profile  RUNNING  Profile  34s       34s  1
@@ -97,7 +97,7 @@ default  resnet18-profile  RUNNING  Profile  34s       34s  1
 5\. Get model profile job detail info.
 
 ```shell
-$ arena model get resnet18-profile
+$ arena analyze get resnet18-profile
 Name:       resnet18-profile
 Namespace:  default
 Type:       Profile
@@ -126,6 +126,4 @@ Forwarding from 127.0.0.1:6006 -> 6006
 Forwarding from [::1]:6006 -> 6006
 ```
 
-
-
-![tensorboard](./1-torchscript-profile-result.jpg)
+![tensorboard](./1-torchscript-profile-result.jpg)
diff --git a/pkg/analyze/submit_benchmark.go b/pkg/analyze/submit_benchmark.go
@@ -24,6 +24,6 @@ func SubmitModelBenchmarkJob(namespace string, args *types.ModelBenchmarkArgs) e
 		return err
 	}
 	log.Infof("The model benchmark job %s has been submitted successfully", args.Name)
-	log.Infof("You can run `arena model get %s` to check the job status", args.Name)
+	log.Infof("You can run `arena analyze get %s` to check the job status", args.Name)
 	return nil
 }
diff --git a/pkg/analyze/submit_evaluate.go b/pkg/analyze/submit_evaluate.go
@@ -23,6 +23,6 @@ func SubmitModelEvaluateJob(namespace string, args *types.ModelEvaluateArgs) err
 		return err
 	}
 	log.Infof("The model evaluate job %s has been submitted successfully", args.Name)
-	log.Infof("You can run `arena model get %s` to check the job status", args.Name)
+	log.Infof("You can run `arena analyze get %s` to check the job status", args.Name)
 	return nil
 }
diff --git a/pkg/analyze/submit_optimize.go b/pkg/analyze/submit_optimize.go
@@ -23,6 +23,6 @@ func SubmitModelOptimizeJob(namespace string, args *types.ModelOptimizeArgs) err
 		return err
 	}
 	log.Infof("The model optimize job %s has been submitted successfully", args.Name)
-	log.Infof("You can run `arena model get %s` to check the job status", args.Name)
+	log.Infof("You can run `arena analyze get %s` to check the job status", args.Name)
 	return nil
 }
diff --git a/pkg/analyze/submit_profile.go b/pkg/analyze/submit_profile.go
@@ -23,6 +23,6 @@ func SubmitModelProfileJob(namespace string, args *types.ModelProfileArgs) error
 		return err
 	}
 	log.Infof("The model profile job %s has been submitted successfully", args.Name)
-	log.Infof("You can run `arena model get %s` to check the job status", args.Name)
+	log.Infof("You can run `arena analyze get %s` to check the job status", args.Name)
 	return nil
 }