From af967ee8f49aa328dc10b85f0627fcf6220274b1 Mon Sep 17 00:00:00 2001
From: zhaochenyang20 <zhaochen20@outlook.com>
Date: Thu, 28 Nov 2024 02:37:42 +0000
Subject: [PATCH] revert test metric

---
 test/srt/test_metrics.py | 153 ++++++++++++++++-----------------------
 1 file changed, 62 insertions(+), 91 deletions(-)

diff --git a/test/srt/test_metrics.py b/test/srt/test_metrics.py
index 94479ab02e..64e35a5dac 100644
--- a/test/srt/test_metrics.py
+++ b/test/srt/test_metrics.py
@@ -1,4 +1,3 @@
-import json
 import unittest
 
 import requests
@@ -12,100 +11,72 @@
 )
 
 
-class TestUpdateWeights(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
-        cls.base_url = DEFAULT_URL_FOR_TEST
-        cls.process = popen_launch_server(
-            cls.model,
-            cls.base_url,
+class TestEnableMetrics(unittest.TestCase):
+    def test_metrics_enabled(self):
+        """Test that metrics endpoint returns data when enabled"""
+        process = popen_launch_server(
+            DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
+            DEFAULT_URL_FOR_TEST,
             timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
-            other_args=("--mem-",),
+            other_args=["--enable-metrics"],
         )
 
-    @classmethod
-    def tearDownClass(cls):
-        kill_child_process(cls.process.pid, include_self=True)
-
-    def run_decode(self):
-        response = requests.post(
-            self.base_url + "/generate",
-            json={
-                "text": "The capital of France is",
-                "sampling_params": {
-                    "temperature": 0,
-                    "max_new_tokens": 32,
+        try:
+            # Make some requests to generate some metrics
+            response = requests.get(f"{DEFAULT_URL_FOR_TEST}/health_generate")
+            self.assertEqual(response.status_code, 200)
+
+            response = requests.post(
+                f"{DEFAULT_URL_FOR_TEST}/generate",
+                json={
+                    "text": "The capital of France is",
+                    "sampling_params": {
+                        "temperature": 0,
+                        "max_new_tokens": 32,
+                    },
+                    "stream": True,
                 },
-            },
-        )
-        print(json.dumps(response.json()))
-        print("=" * 100)
-        text = response.json()["text"]
-        return text
-
-    def get_model_info(self):
-        response = requests.get(self.base_url + "/get_model_info")
-        model_path = response.json()["model_path"]
-        print(json.dumps(response.json()))
-        return model_path
-
-    def run_update_weights(self, model_path):
-        response = requests.post(
-            self.base_url + "/update_weights_from_disk",
-            json={
-                "model_path": model_path,
-            },
-        )
-        ret = response.json()
-        print(json.dumps(response.json()))
-        return ret
-
-    def test_update_weights(self):
-        origin_model_path = self.get_model_info()
-        print(f"origin_model_path: {origin_model_path}")
-        origin_response = self.run_decode()
-
-        # update weights
-        new_model_path = DEFAULT_SMALL_MODEL_NAME_FOR_TEST.replace("-Instruct", "")
-        ret = self.run_update_weights(new_model_path)
-        assert ret["success"]
-
-        updated_model_path = self.get_model_info()
-        print(f"updated_model_path: {updated_model_path}")
-        assert updated_model_path == new_model_path
-        assert updated_model_path != origin_model_path
-
-        updated_response = self.run_decode()
-        assert origin_response[:32] != updated_response[:32]
-
-        # update weights back
-        ret = self.run_update_weights(origin_model_path)
-        assert ret["success"]
-
-        updated_model_path = self.get_model_info()
-        assert updated_model_path == origin_model_path
-
-        updated_response = self.run_decode()
-        assert origin_response[:32] == updated_response[:32]
-
-    def test_update_weights_unexist_model(self):
-        origin_model_path = self.get_model_info()
-        print(f"origin_model_path: {origin_model_path}")
-        origin_response = self.run_decode()
-
-        # update weights
-        new_model_path = DEFAULT_SMALL_MODEL_NAME_FOR_TEST.replace("-Instruct", "wrong")
-        ret = self.run_update_weights(new_model_path)
-        assert not ret["success"]
-
-        updated_model_path = self.get_model_info()
-        print(f"updated_model_path: {updated_model_path}")
-        assert updated_model_path == origin_model_path
-
-        updated_response = self.run_decode()
-        assert origin_response[:32] == updated_response[:32]
+                stream=True,
+            )
+            for _ in response.iter_lines(decode_unicode=False):
+                pass
+
+            # Get metrics
+            metrics_response = requests.get(f"{DEFAULT_URL_FOR_TEST}/metrics")
+            self.assertEqual(metrics_response.status_code, 200)
+            metrics_content = metrics_response.text
+
+            print(f"metrics_content=\n{metrics_content}")
+
+            # Verify essential metrics are present
+            essential_metrics = [
+                "sglang:num_running_reqs",
+                "sglang:token_usage",
+                "sglang:gen_throughput",
+                "sglang:cache_hit_rate",
+                "sglang:func_latency_seconds",
+                "sglang:prompt_tokens_total",
+                "sglang:generation_tokens_total",
+                "sglang:time_to_first_token_seconds",
+                "sglang:time_per_output_token_seconds",
+                "sglang:e2e_request_latency_seconds",
+            ]
+
+            for metric in essential_metrics:
+                self.assertIn(metric, metrics_content, f"Missing metric: {metric}")
+
+            # Verify model name label is present and correct
+            expected_model_name = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
+            self.assertIn(f'model_name="{expected_model_name}"', metrics_content)
+
+            # Verify metrics have values (not empty)
+            self.assertIn("_sum{", metrics_content)
+            self.assertIn("_count{", metrics_content)
+            self.assertIn("_bucket{", metrics_content)
+
+        finally:
+            kill_child_process(process.pid, include_self=True)
 
 
 if __name__ == "__main__":
-    unittest.main()
+    unittest.main()
\ No newline at end of file