Merge branch 'main' of https://github.com/arc53/llm-price-compass

arc53 · Aug 14, 2024 · 73b9f1c · 73b9f1c
2 parents ff5932b + 0a98272
commit 73b9f1c
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 5 deletions.
diff --git a/gpu-benchmarks.csv b/gpu-benchmarks.csv
@@ -1,3 +1,3 @@
-"backend","benchmark_duration","best_of","completed","cost_per_hour","date","duration","gpu","input_throughput","input_token_throughput","mean_e2e_latency","mean_itl","mean_tpot","mean_ttft","median_e2e_latency","median_itl","median_tpot","median_ttft","model_id","num_prompts","output_throughput","output_token_throughput","p99_itl","p99_tpot","p99_ttft","provider","request_rate","request_throughput","successful_requests","tokenizer_id","total_generated_tokens","total_generated_tokens_ret","total_input_tokens","total_output_tokens","traffic_request_rate","use_beam_search"
-"sglang",35.45,,,3.02,"20240814-155812",,"NVIDIA H100",,6070.48,14328.17,71.79,145.31,773.1,13759.53,49.35,81.55,193.72,,,,5595.07,291.88,915.34,4118.65,"scaleway",,28.21,1000,,198343,197967,215196,,200,
-"vllm",,1,1000,0.84,"20240813-143958",385.30312793600024,"NVIDIA L4",558.5109084184351,,,,,,,,,,"meta-llama/Meta-Llama-3.1-8B-Instruct",1000,513.2037236740132,,,,,"scaleway","inf",2.5953591536015312,,"meta-llama/Meta-Llama-3.1-8B-Instruct",,,215196,197739,,false
+"backend","benchmark_duration","best_of","completed","cost_per_hour","date","duration","gpu","input_throughput","mean_e2e_latency","mean_itl","mean_tpot","mean_ttft","median_e2e_latency","median_itl","median_tpot","median_ttft","model_id","num_prompts","output_throughput","p99_itl","p99_tpot","p99_ttft","provider","request_rate","request_throughput","successful_requests","tokenizer_id","total_generated_tokens","total_generated_tokens_ret","total_input_tokens","total_output_tokens","traffic_request_rate","use_beam_search"
+"sglang",35.45,,,3.02,"20240814-155812",,"NVIDIA H100",6070.48,14328.17,71.79,145.31,773.1,13759.53,49.35,81.55,193.72,,,5595.07,291.88,915.34,4118.65,"scaleway",,28.21,1000,,198343,197967,215196,,200,
+"vllm",,1,1000,0.84,"20240813-143958",385.30312793600024,"NVIDIA L4",558.5109084184351,,,,,,,,,"meta-llama/Meta-Llama-3.1-8B-Instruct",1000,513.2037236740132,,,,"scaleway","inf",2.5953591536015312,,"meta-llama/Meta-Llama-3.1-8B-Instruct",,,215196,197739,,false
diff --git a/gpu-benchmarks.json b/gpu-benchmarks.json
@@ -12,8 +12,8 @@
     "total_generated_tokens": 198343,
     "total_generated_tokens_ret": 197967,
     "request_throughput": 28.21,
-    "input_token_throughput": 6070.48,
-    "output_token_throughput": 5595.07,
+    "input_throughput": 6070.48,
+    "output_throughput": 5595.07,
     "mean_e2e_latency": 14328.17,
     "median_e2e_latency": 13759.53,
     "mean_ttft": 773.1,