diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py
index 144ade58ea..8b326cb612 100644
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -169,9 +169,11 @@ def __post_init__(self):
             gpu_mem = get_amdgpu_memory_capacity()
         else:
             gpu_mem = get_nvgpu_memory_capacity()
+
+        # If the GPU memory is less than 25GB (like GTX 4090) and the user hasn't manually specified the chunked prefill size, we reduce its default value by a factor of 4.
         if gpu_mem < 25000:
-            self.chunked_prefill_size //= 4  # make it 2048
-            self.cuda_graph_max_bs = 4
+            if self.chunked_prefill_size == 8192:
+               self.chunked_prefill_size //= 4  # make it 2048
             logger.info("Automatically adjust --chunked-prefill-size for small GPUs.")
 
         # Choose kernel backends