Skip to content

Commit

Permalink
Fix test duration time inaccurate issue (#183)
Browse files Browse the repository at this point in the history
Signed-off-by: lvliang-intel <liang1.lv@intel.com>
  • Loading branch information
lvliang-intel authored Nov 6, 2024
1 parent a19f42e commit 9d76832
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
3 changes: 3 additions & 0 deletions evals/benchmark/stresscli/locust/aistress.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def bench_main(self):
"faqgenfixed",
"faqgenbench",
]
test_start_time = time.time()
try:
start_ts = time.perf_counter()
with self.client.post(
Expand Down Expand Up @@ -150,6 +151,7 @@ def bench_main(self):
"response_string": resp.text,
"first_token_latency": time.perf_counter() - start_ts,
"total_latency": time.perf_counter() - start_ts,
"test_start_time": test_start_time,
}
else:
first_token_ts = None
Expand Down Expand Up @@ -184,6 +186,7 @@ def bench_main(self):
"response_string": complete_response,
"first_token_latency": first_token_ts - start_ts,
"total_latency": end_ts - start_ts,
"test_start_time": test_start_time,
}
reqdata = bench_package.respStatics(self.environment, reqData, respData)
logging.debug(f"Request data collected {reqdata}")
Expand Down
12 changes: 6 additions & 6 deletions evals/benchmark/stresscli/locust/tokenresponse.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@ def respStatics(environment, req, resp):
num_token_input_prompt = -1

num_token_output = len(
tokenizer.encode(
resp["response_string"].lstrip().encode("utf-8").decode("unicode_escape"), add_special_tokens=False
)
tokenizer.encode(resp["response_string"].encode("utf-8").decode("unicode_escape"), add_special_tokens=False)
)

return {
Expand All @@ -37,6 +35,7 @@ def respStatics(environment, req, resp):
"first_token": resp["first_token_latency"] * 1000,
"next_token": (resp["total_latency"] - resp["first_token_latency"]) / (num_token_output - 1) * 1000,
"total_latency": resp["total_latency"] * 1000,
"test_start_time": resp["test_start_time"],
}


Expand All @@ -47,7 +46,6 @@ def staticsOutput(environment, reqlist):
e2e_lat = []
tokens_input = 0
tokens_output = 0
duration = environment.runner.stats.last_request_timestamp - environment.runner.stats.start_time

if len(reqlist) == 0:
logging.debug(f"len(reqlist): {len(reqlist)}, skip printing")
Expand All @@ -60,6 +58,8 @@ def staticsOutput(environment, reqlist):
e2e_lat.append(req["total_latency"])
tokens_output += req["tokens_output"]
tokens_input += req["tokens_input"]
test_start_time = req["test_start_time"]
duration = environment.runner.stats.last_request_timestamp - test_start_time

# Statistics for success response data only
if tokens_output == 0:
Expand All @@ -70,8 +70,8 @@ def staticsOutput(environment, reqlist):
" Output Tokens: {}, RPS: {:.2f}, Input Tokens per Second: {:.2f}, Output Tokens per Second: {:.2f}"
)
e2e_msg = "End to End latency(ms), P50: {:.2f}, P90: {:.2f}, P99: {:.2f}, Avg: {:.2f}"
first_msg = "First token latency(ms), P50: {:.2f}, P90: {:.2f}, P99: {:.2f}, Avg: {:.2f}"
next_msg = "Next token latency(ms), P50: {:.2f}, P90: {:.2f}, P99: {:.2f}, Avg: {:.2f}"
first_msg = "Time to First Token-TTFT(ms), P50: {:.2f}, P90: {:.2f}, P99: {:.2f}, Avg: {:.2f}"
next_msg = "Time Per Output Token-TPOT(ms), P50: {:.2f}, P90: {:.2f}, P99: {:.2f}, Avg: {:.2f}"
average_msg = "Average token latency(ms) : {:.2f}"
console_logger.warning("\n=================Total statistics=====================")
if tokens_output == 0:
Expand Down

0 comments on commit 9d76832

Please sign in to comment.