From af967ee8f49aa328dc10b85f0627fcf6220274b1 Mon Sep 17 00:00:00 2001 From: zhaochenyang20 Date: Thu, 28 Nov 2024 02:37:42 +0000 Subject: [PATCH] revert test metric --- test/srt/test_metrics.py | 153 ++++++++++++++++----------------------- 1 file changed, 62 insertions(+), 91 deletions(-) diff --git a/test/srt/test_metrics.py b/test/srt/test_metrics.py index 94479ab02e..64e35a5dac 100644 --- a/test/srt/test_metrics.py +++ b/test/srt/test_metrics.py @@ -1,4 +1,3 @@ -import json import unittest import requests @@ -12,100 +11,72 @@ ) -class TestUpdateWeights(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST - cls.base_url = DEFAULT_URL_FOR_TEST - cls.process = popen_launch_server( - cls.model, - cls.base_url, +class TestEnableMetrics(unittest.TestCase): + def test_metrics_enabled(self): + """Test that metrics endpoint returns data when enabled""" + process = popen_launch_server( + DEFAULT_SMALL_MODEL_NAME_FOR_TEST, + DEFAULT_URL_FOR_TEST, timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=("--mem-",), + other_args=["--enable-metrics"], ) - @classmethod - def tearDownClass(cls): - kill_child_process(cls.process.pid, include_self=True) - - def run_decode(self): - response = requests.post( - self.base_url + "/generate", - json={ - "text": "The capital of France is", - "sampling_params": { - "temperature": 0, - "max_new_tokens": 32, + try: + # Make some requests to generate some metrics + response = requests.get(f"{DEFAULT_URL_FOR_TEST}/health_generate") + self.assertEqual(response.status_code, 200) + + response = requests.post( + f"{DEFAULT_URL_FOR_TEST}/generate", + json={ + "text": "The capital of France is", + "sampling_params": { + "temperature": 0, + "max_new_tokens": 32, + }, + "stream": True, }, - }, - ) - print(json.dumps(response.json())) - print("=" * 100) - text = response.json()["text"] - return text - - def get_model_info(self): - response = requests.get(self.base_url + "/get_model_info") - model_path = response.json()["model_path"] - print(json.dumps(response.json())) - return model_path - - def run_update_weights(self, model_path): - response = requests.post( - self.base_url + "/update_weights_from_disk", - json={ - "model_path": model_path, - }, - ) - ret = response.json() - print(json.dumps(response.json())) - return ret - - def test_update_weights(self): - origin_model_path = self.get_model_info() - print(f"origin_model_path: {origin_model_path}") - origin_response = self.run_decode() - - # update weights - new_model_path = DEFAULT_SMALL_MODEL_NAME_FOR_TEST.replace("-Instruct", "") - ret = self.run_update_weights(new_model_path) - assert ret["success"] - - updated_model_path = self.get_model_info() - print(f"updated_model_path: {updated_model_path}") - assert updated_model_path == new_model_path - assert updated_model_path != origin_model_path - - updated_response = self.run_decode() - assert origin_response[:32] != updated_response[:32] - - # update weights back - ret = self.run_update_weights(origin_model_path) - assert ret["success"] - - updated_model_path = self.get_model_info() - assert updated_model_path == origin_model_path - - updated_response = self.run_decode() - assert origin_response[:32] == updated_response[:32] - - def test_update_weights_unexist_model(self): - origin_model_path = self.get_model_info() - print(f"origin_model_path: {origin_model_path}") - origin_response = self.run_decode() - - # update weights - new_model_path = DEFAULT_SMALL_MODEL_NAME_FOR_TEST.replace("-Instruct", "wrong") - ret = self.run_update_weights(new_model_path) - assert not ret["success"] - - updated_model_path = self.get_model_info() - print(f"updated_model_path: {updated_model_path}") - assert updated_model_path == origin_model_path - - updated_response = self.run_decode() - assert origin_response[:32] == updated_response[:32] + stream=True, + ) + for _ in response.iter_lines(decode_unicode=False): + pass + + # Get metrics + metrics_response = requests.get(f"{DEFAULT_URL_FOR_TEST}/metrics") + self.assertEqual(metrics_response.status_code, 200) + metrics_content = metrics_response.text + + print(f"metrics_content=\n{metrics_content}") + + # Verify essential metrics are present + essential_metrics = [ + "sglang:num_running_reqs", + "sglang:token_usage", + "sglang:gen_throughput", + "sglang:cache_hit_rate", + "sglang:func_latency_seconds", + "sglang:prompt_tokens_total", + "sglang:generation_tokens_total", + "sglang:time_to_first_token_seconds", + "sglang:time_per_output_token_seconds", + "sglang:e2e_request_latency_seconds", + ] + + for metric in essential_metrics: + self.assertIn(metric, metrics_content, f"Missing metric: {metric}") + + # Verify model name label is present and correct + expected_model_name = DEFAULT_SMALL_MODEL_NAME_FOR_TEST + self.assertIn(f'model_name="{expected_model_name}"', metrics_content) + + # Verify metrics have values (not empty) + self.assertIn("_sum{", metrics_content) + self.assertIn("_count{", metrics_content) + self.assertIn("_bucket{", metrics_content) + + finally: + kill_child_process(process.pid, include_self=True) if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file