diff --git a/python/sglang/api.py b/python/sglang/api.py
index 9470b1425d..fc6dce16e3 100644
--- a/python/sglang/api.py
+++ b/python/sglang/api.py
@@ -3,22 +3,10 @@
 import re
 from typing import Callable, List, Optional, Union
 
-from sglang.backend.anthropic import Anthropic
 from sglang.backend.base_backend import BaseBackend
-from sglang.backend.openai import OpenAI
-from sglang.backend.runtime_endpoint import RuntimeEndpoint
-from sglang.backend.vertexai import VertexAI
 from sglang.global_config import global_config
-from sglang.lang.ir import (
-    SglExpr,
-    SglExprList,
-    SglFunction,
-    SglGen,
-    SglImage,
-    SglRoleBegin,
-    SglRoleEnd,
-    SglSelect,
-)
+from sglang.lang.ir import (SglExpr, SglExprList, SglFunction, SglGen,
+                            SglImage, SglRoleBegin, SglRoleEnd, SglSelect)
 
 
 def function(
diff --git a/python/sglang/backend/anthropic.py b/python/sglang/backend/anthropic.py
index aa03cb5b6c..05cf4e77e7 100644
--- a/python/sglang/backend/anthropic.py
+++ b/python/sglang/backend/anthropic.py
@@ -1,6 +1,4 @@
-from typing import List, Optional, Union
 
-import numpy as np
 from sglang.backend.base_backend import BaseBackend
 from sglang.lang.chat_template import get_chat_template
 from sglang.lang.interpreter import StreamExecutor
diff --git a/python/sglang/backend/base_backend.py b/python/sglang/backend/base_backend.py
index cb504f51b7..606b821a87 100644
--- a/python/sglang/backend/base_backend.py
+++ b/python/sglang/backend/base_backend.py
@@ -1,4 +1,4 @@
-from typing import Callable, List, Optional, Union
+from typing import List, Optional, Union
 
 from sglang.lang.chat_template import get_chat_template
 from sglang.lang.interpreter import StreamExecutor
diff --git a/python/sglang/backend/openai.py b/python/sglang/backend/openai.py
index f2dd2f0678..540baae5dd 100644
--- a/python/sglang/backend/openai.py
+++ b/python/sglang/backend/openai.py
@@ -1,17 +1,17 @@
 import logging
 import time
-from typing import Callable, List, Optional, Union
+from typing import List, Optional
 
 import numpy as np
 from sglang.backend.base_backend import BaseBackend
-from sglang.lang.chat_template import ChatTemplate, get_chat_template_by_model_path
+from sglang.lang.chat_template import (ChatTemplate,
+                                       get_chat_template_by_model_path)
 from sglang.lang.interpreter import StreamExecutor
 from sglang.lang.ir import SglSamplingParams
 
 try:
-    import tiktoken
-
     import openai
+    import tiktoken
 except ImportError as e:
     openai = tiktoken = e
 
diff --git a/python/sglang/backend/runtime_endpoint.py b/python/sglang/backend/runtime_endpoint.py
index 3d2ecaa762..bd2c053ffa 100644
--- a/python/sglang/backend/runtime_endpoint.py
+++ b/python/sglang/backend/runtime_endpoint.py
@@ -1,14 +1,13 @@
 import json
-from typing import Callable, List, Optional, Union
+from typing import List, Optional
 
 import numpy as np
-import requests
 from sglang.backend.base_backend import BaseBackend
 from sglang.global_config import global_config
 from sglang.lang.chat_template import get_chat_template_by_model_path
 from sglang.lang.interpreter import StreamExecutor
-from sglang.lang.ir import SglArgument, SglSamplingParams
-from sglang.utils import encode_image_base64, find_printable_text, http_request
+from sglang.lang.ir import SglSamplingParams
+from sglang.utils import find_printable_text, http_request
 
 
 class RuntimeEndpoint(BaseBackend):
diff --git a/python/sglang/backend/vertexai.py b/python/sglang/backend/vertexai.py
index 5c3c307e2e..4c130ff78d 100644
--- a/python/sglang/backend/vertexai.py
+++ b/python/sglang/backend/vertexai.py
@@ -1,8 +1,6 @@
 import os
 import warnings
-from typing import List, Optional, Union
 
-import numpy as np
 from sglang.backend.base_backend import BaseBackend
 from sglang.lang.chat_template import get_chat_template
 from sglang.lang.interpreter import StreamExecutor
@@ -10,11 +8,8 @@
 
 try:
     import vertexai
-    from vertexai.preview.generative_models import (
-        GenerationConfig,
-        GenerativeModel,
-        Image,
-    )
+    from vertexai.preview.generative_models import (GenerationConfig,
+                                                    GenerativeModel, Image)
 except ImportError as e:
     GenerativeModel = e
 
diff --git a/python/sglang/lang/chat_template.py b/python/sglang/lang/chat_template.py
index 43303bf4c7..aa4bc2f2a3 100644
--- a/python/sglang/lang/chat_template.py
+++ b/python/sglang/lang/chat_template.py
@@ -1,6 +1,6 @@
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from enum import Enum, auto
-from typing import Callable, Dict, List, Optional, Tuple
+from typing import Callable, Dict, List, Tuple
 
 
 class ChatTemplateStyle(Enum):
diff --git a/python/sglang/lang/compiler.py b/python/sglang/lang/compiler.py
index 2c071e407e..b2a83ea3c2 100644
--- a/python/sglang/lang/compiler.py
+++ b/python/sglang/lang/compiler.py
@@ -5,13 +5,7 @@
 
 from sglang.global_config import global_config
 from sglang.lang.interpreter import ProgramState, StreamExecutor, pin_program
-from sglang.lang.ir import (
-    SglArgument,
-    SglConstantText,
-    SglExpr,
-    SglSamplingParams,
-    SglVariable,
-)
+from sglang.lang.ir import SglArgument, SglExpr, SglSamplingParams, SglVariable
 
 
 def compile_func(function, backend):
diff --git a/python/sglang/lang/interpreter.py b/python/sglang/lang/interpreter.py
index 08a8d401bc..83c4d915a5 100644
--- a/python/sglang/lang/interpreter.py
+++ b/python/sglang/lang/interpreter.py
@@ -7,26 +7,14 @@
 import uuid
 from concurrent.futures import ThreadPoolExecutor
 from contextlib import contextmanager
-from typing import Any, Callable, Dict, List, Optional, Union
+from typing import Any, Callable, Dict, List, Optional
 
 import tqdm
 from sglang.global_config import global_config
-from sglang.lang.ir import (
-    SglCommitLazy,
-    SglConcateAndAppend,
-    SglConstantText,
-    SglExpr,
-    SglExprList,
-    SglFunction,
-    SglGen,
-    SglImage,
-    SglRoleBegin,
-    SglRoleEnd,
-    SglSelect,
-    SglVariable,
-    SglVarScopeBegin,
-    SglVarScopeEnd,
-)
+from sglang.lang.ir import (SglCommitLazy, SglConcateAndAppend,
+                            SglConstantText, SglExpr, SglExprList, SglGen,
+                            SglImage, SglRoleBegin, SglRoleEnd, SglSelect,
+                            SglVariable, SglVarScopeBegin, SglVarScopeEnd)
 from sglang.utils import encode_image_base64
 
 
diff --git a/python/sglang/lang/ir.py b/python/sglang/lang/ir.py
index 9895786dc6..66f515686e 100644
--- a/python/sglang/lang/ir.py
+++ b/python/sglang/lang/ir.py
@@ -472,4 +472,4 @@ def __init__(self):
         super().__init__()
 
     def __repr__(self):
-        return f"CommitLazy()"
+        return "CommitLazy()"
diff --git a/python/sglang/lang/tracer.py b/python/sglang/lang/tracer.py
index 74ac9b9986..fcf618b695 100644
--- a/python/sglang/lang/tracer.py
+++ b/python/sglang/lang/tracer.py
@@ -1,29 +1,14 @@
 """Tracing a program."""
 
 import uuid
-from typing import Any, Callable, Dict, List, Optional, Union
+from typing import Any, Dict, List, Optional
 
 from sglang.backend.base_backend import BaseBackend
-from sglang.global_config import global_config
 from sglang.lang.interpreter import ProgramState, ProgramStateGroup
-from sglang.lang.ir import (
-    SglArgument,
-    SglCommitLazy,
-    SglConcateAndAppend,
-    SglConstantText,
-    SglExpr,
-    SglExprList,
-    SglFork,
-    SglFunction,
-    SglGen,
-    SglGetForkItem,
-    SglRoleBegin,
-    SglRoleEnd,
-    SglSelect,
-    SglVariable,
-    SglVarScopeBegin,
-    SglVarScopeEnd,
-)
+from sglang.lang.ir import (SglArgument, SglConstantText, SglExpr, SglExprList,
+                            SglFork, SglGen, SglGetForkItem, SglRoleBegin,
+                            SglRoleEnd, SglSelect, SglVariable,
+                            SglVarScopeBegin, SglVarScopeEnd)
 
 
 class StopTracing(Exception):
diff --git a/python/sglang/srt/hf_transformers_utils.py b/python/sglang/srt/hf_transformers_utils.py
index fde8457a39..48de79dc9d 100644
--- a/python/sglang/srt/hf_transformers_utils.py
+++ b/python/sglang/srt/hf_transformers_utils.py
@@ -3,17 +3,12 @@
 import json
 import os
 import warnings
-from typing import List, Optional, Tuple, Union
+from typing import Optional, Union
 
 from huggingface_hub import snapshot_download
 from sglang.srt.utils import is_multimodal_model
-from transformers import (
-    AutoConfig,
-    AutoProcessor,
-    AutoTokenizer,
-    PreTrainedTokenizer,
-    PreTrainedTokenizerFast,
-)
+from transformers import (AutoConfig, AutoProcessor, AutoTokenizer,
+                          PreTrainedTokenizer, PreTrainedTokenizerFast)
 
 
 def download_from_hf(model_path: str):
diff --git a/python/sglang/srt/layers/extend_attention.py b/python/sglang/srt/layers/extend_attention.py
index 62167a5825..6edc140669 100644
--- a/python/sglang/srt/layers/extend_attention.py
+++ b/python/sglang/srt/layers/extend_attention.py
@@ -1,7 +1,8 @@
 import torch
 import triton
 import triton.language as tl
-from sglang.srt.layers.context_flashattention_nopad import context_attention_fwd
+from sglang.srt.layers.context_flashattention_nopad import \
+    context_attention_fwd
 from sglang.srt.utils import wrap_kernel_launcher
 
 CUDA_CAPABILITY = torch.cuda.get_device_capability()
diff --git a/python/sglang/srt/layers/logits_processor.py b/python/sglang/srt/layers/logits_processor.py
index 980a2cd207..bbca95b347 100644
--- a/python/sglang/srt/layers/logits_processor.py
+++ b/python/sglang/srt/layers/logits_processor.py
@@ -1,10 +1,8 @@
 import torch
-from sglang.srt.managers.router.model_runner import ForwardMode, InputMetadata
+from sglang.srt.managers.router.model_runner import ForwardMode
 from torch import nn
 from vllm.model_executor.parallel_utils.communication_op import (
-    get_tensor_model_parallel_world_size,
-    tensor_model_parallel_all_gather,
-)
+    get_tensor_model_parallel_world_size, tensor_model_parallel_all_gather)
 
 
 class LogitsProcessor(nn.Module):
diff --git a/python/sglang/srt/layers/radix_attention.py b/python/sglang/srt/layers/radix_attention.py
index 5adc31d3ef..180edb225e 100644
--- a/python/sglang/srt/layers/radix_attention.py
+++ b/python/sglang/srt/layers/radix_attention.py
@@ -1,5 +1,6 @@
 import torch
-from sglang.srt.layers.context_flashattention_nopad import context_attention_fwd
+from sglang.srt.layers.context_flashattention_nopad import \
+    context_attention_fwd
 from sglang.srt.layers.extend_attention import extend_attention_fwd
 from sglang.srt.layers.token_attention import token_attention_fwd
 from sglang.srt.managers.router.model_runner import ForwardMode, InputMetadata
@@ -15,7 +16,8 @@ def __init__(self, num_heads, head_dim, scaling, num_kv_heads, layer_id):
         self.head_dim = head_dim
         self.layer_id = layer_id
 
-        from sglang.srt.managers.router.model_runner import global_server_args_dict
+        from sglang.srt.managers.router.model_runner import \
+            global_server_args_dict
 
         if global_server_args_dict.get("enable_flashinfer", False):
             self.prefill_forward = self.prefill_forward_flashinfer
diff --git a/python/sglang/srt/managers/detokenizer_manager.py b/python/sglang/srt/managers/detokenizer_manager.py
index 566d40d13a..7b507df241 100644
--- a/python/sglang/srt/managers/detokenizer_manager.py
+++ b/python/sglang/srt/managers/detokenizer_manager.py
@@ -83,7 +83,7 @@ def start_detokenizer_process(
 ):
     try:
         manager = DetokenizerManager(server_args, port_args)
-    except Exception as e:
+    except Exception:
         pipe_writer.send(get_exception_traceback())
         raise
     pipe_writer.send("init ok")
diff --git a/python/sglang/srt/managers/router/model_rpc.py b/python/sglang/srt/managers/router/model_rpc.py
index 5c9be20959..0db091d1a2 100644
--- a/python/sglang/srt/managers/router/model_rpc.py
+++ b/python/sglang/srt/managers/router/model_rpc.py
@@ -13,23 +13,17 @@
 from sglang.srt.constrained.fsm_cache import FSMCache
 from sglang.srt.constrained.jump_forward import JumpForwardCache
 from sglang.srt.hf_transformers_utils import get_processor, get_tokenizer
-from sglang.srt.managers.io_struct import (
-    BatchTokenIDOut,
-    FlushCacheReq,
-    TokenizedGenerateReqInput,
-)
+from sglang.srt.managers.io_struct import (BatchTokenIDOut, FlushCacheReq,
+                                           TokenizedGenerateReqInput)
 from sglang.srt.managers.router.infer_batch import Batch, ForwardMode, Req
 from sglang.srt.managers.router.model_runner import ModelRunner
 from sglang.srt.managers.router.radix_cache import RadixCache
 from sglang.srt.managers.router.scheduler import Scheduler
 from sglang.srt.model_config import ModelConfig
 from sglang.srt.server_args import PortArgs, ServerArgs
-from sglang.srt.utils import (
-    get_exception_traceback,
-    get_int_token_logit_bias,
-    is_multimodal_model,
-    set_random_seed,
-)
+from sglang.srt.utils import (get_exception_traceback,
+                              get_int_token_logit_bias, is_multimodal_model,
+                              set_random_seed)
 from vllm.logger import _default_handler as vllm_default_handler
 
 logger = logging.getLogger("model_rpc")
diff --git a/python/sglang/srt/managers/router/model_runner.py b/python/sglang/srt/managers/router/model_runner.py
index f349819f30..f564ed4859 100644
--- a/python/sglang/srt/managers/router/model_runner.py
+++ b/python/sglang/srt/managers/router/model_runner.py
@@ -16,7 +16,8 @@
 from vllm.model_executor.layers.quantization.gptq import GPTQConfig
 from vllm.model_executor.layers.quantization.marlin import MarlinConfig
 from vllm.model_executor.model_loader import _set_default_torch_dtype
-from vllm.model_executor.parallel_utils.parallel_state import initialize_model_parallel
+from vllm.model_executor.parallel_utils.parallel_state import \
+    initialize_model_parallel
 
 QUANTIONCONFIG_MAPPING = {"awq": AWQConfig, "gptq": GPTQConfig, "marlin": MarlinConfig}
 
@@ -92,10 +93,8 @@ class InputMetadata:
     decode_wrapper = None
 
     def init_flashinfer_args(self, tp_size):
-        from flashinfer import (
-            BatchDecodeWithPagedKVCacheWrapper,
-            BatchPrefillWithPagedKVCacheWrapper,
-        )
+        from flashinfer import (BatchDecodeWithPagedKVCacheWrapper,
+                                BatchPrefillWithPagedKVCacheWrapper)
 
         self.kv_indptr = torch.zeros(
             (self.batch_size + 1,), dtype=torch.int32, device="cuda"
diff --git a/python/sglang/srt/managers/router/radix_cache.py b/python/sglang/srt/managers/router/radix_cache.py
index 6ee6703091..b6d70583e0 100644
--- a/python/sglang/srt/managers/router/radix_cache.py
+++ b/python/sglang/srt/managers/router/radix_cache.py
@@ -1,8 +1,6 @@
 import heapq
 import time
 from collections import defaultdict
-from dataclasses import dataclass
-from typing import Tuple
 
 import torch
 
diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py
index 7947ca2ff5..8c82dd5aaa 100644
--- a/python/sglang/srt/managers/tokenizer_manager.py
+++ b/python/sglang/srt/managers/tokenizer_manager.py
@@ -10,23 +10,16 @@
 import uvloop
 import zmq
 import zmq.asyncio
-from sglang.srt.hf_transformers_utils import (
-    get_config,
-    get_context_length,
-    get_processor,
-    get_tokenizer,
-)
-from sglang.srt.managers.io_struct import (
-    BatchStrOut,
-    DetokenizeReqInput,
-    FlushCacheReq,
-    GenerateReqInput,
-    TokenizedGenerateReqInput,
-)
+from sglang.srt.hf_transformers_utils import (get_config, get_context_length,
+                                              get_processor, get_tokenizer)
+from sglang.srt.managers.io_struct import (BatchStrOut, DetokenizeReqInput,
+                                           FlushCacheReq, GenerateReqInput,
+                                           TokenizedGenerateReqInput)
 from sglang.srt.mm_utils import expand2square, process_anyres_image
 from sglang.srt.sampling_params import SamplingParams
 from sglang.srt.server_args import PortArgs, ServerArgs
-from sglang.srt.utils import get_exception_traceback, is_multimodal_model, load_image
+from sglang.srt.utils import (get_exception_traceback, is_multimodal_model,
+                              load_image)
 
 asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
 
diff --git a/python/sglang/srt/models/gemma.py b/python/sglang/srt/models/gemma.py
index 4030c5cd7f..2e7ada43e7 100644
--- a/python/sglang/srt/models/gemma.py
+++ b/python/sglang/srt/models/gemma.py
@@ -12,21 +12,17 @@
 from vllm.model_executor.input_metadata import InputMetadata
 from vllm.model_executor.layers.activation import GeluAndMul
 from vllm.model_executor.layers.layernorm import RMSNorm
-from vllm.model_executor.layers.linear import (
-    LinearMethodBase,
-    MergedColumnParallelLinear,
-    QKVParallelLinear,
-    RowParallelLinear,
-)
+from vllm.model_executor.layers.linear import (LinearMethodBase,
+                                               MergedColumnParallelLinear,
+                                               QKVParallelLinear,
+                                               RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
-from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding
-from vllm.model_executor.parallel_utils.parallel_state import (
-    get_tensor_model_parallel_world_size,
-)
-from vllm.model_executor.weight_utils import (
-    default_weight_loader,
-    hf_model_weights_iterator,
-)
+from vllm.model_executor.layers.vocab_parallel_embedding import \
+    VocabParallelEmbedding
+from vllm.model_executor.parallel_utils.parallel_state import \
+    get_tensor_model_parallel_world_size
+from vllm.model_executor.weight_utils import (default_weight_loader,
+                                              hf_model_weights_iterator)
 
 
 class GemmaMLP(nn.Module):
diff --git a/python/sglang/srt/models/llama2.py b/python/sglang/srt/models/llama2.py
index e5c28fa127..559b598c67 100644
--- a/python/sglang/srt/models/llama2.py
+++ b/python/sglang/srt/models/llama2.py
@@ -1,7 +1,7 @@
 # Adapted from
 # https://github.com/vllm-project/vllm/blob/671af2b1c0b3ed6d856d37c21a561cc429a10701/vllm/model_executor/models/llama.py#L1
 """Inference-only LLaMA model compatible with HuggingFace weights."""
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, Optional, Tuple
 
 import torch
 from sglang.srt.layers.logits_processor import LogitsProcessor
@@ -11,24 +11,17 @@
 from transformers import LlamaConfig
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.layernorm import RMSNorm
-from vllm.model_executor.layers.linear import (
-    LinearMethodBase,
-    MergedColumnParallelLinear,
-    QKVParallelLinear,
-    RowParallelLinear,
-)
+from vllm.model_executor.layers.linear import (LinearMethodBase,
+                                               MergedColumnParallelLinear,
+                                               QKVParallelLinear,
+                                               RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
 from vllm.model_executor.layers.vocab_parallel_embedding import (
-    ParallelLMHead,
-    VocabParallelEmbedding,
-)
-from vllm.model_executor.parallel_utils.parallel_state import (
-    get_tensor_model_parallel_world_size,
-)
-from vllm.model_executor.weight_utils import (
-    default_weight_loader,
-    hf_model_weights_iterator,
-)
+    ParallelLMHead, VocabParallelEmbedding)
+from vllm.model_executor.parallel_utils.parallel_state import \
+    get_tensor_model_parallel_world_size
+from vllm.model_executor.weight_utils import (default_weight_loader,
+                                              hf_model_weights_iterator)
 
 
 class LlamaMLP(nn.Module):
diff --git a/python/sglang/srt/models/llava.py b/python/sglang/srt/models/llava.py
index 8e42d48c79..4bd6991848 100644
--- a/python/sglang/srt/models/llava.py
+++ b/python/sglang/srt/models/llava.py
@@ -6,20 +6,15 @@
 import torch
 from sglang.srt.managers.router.infer_batch import ForwardMode
 from sglang.srt.managers.router.model_runner import InputMetadata
-from sglang.srt.mm_utils import (
-    get_anyres_image_grid_shape,
-    unpad_image,
-    unpad_image_shape,
-)
+from sglang.srt.mm_utils import (get_anyres_image_grid_shape, unpad_image,
+                                 unpad_image_shape)
 from sglang.srt.models.llama2 import LlamaForCausalLM
 from torch import nn
-from transformers import CLIPVisionModel, LlamaConfig, LlavaConfig
+from transformers import CLIPVisionModel, LlavaConfig
 from transformers.models.llava.modeling_llava import LlavaMultiModalProjector
 from vllm.model_executor.layers.linear import LinearMethodBase
-from vllm.model_executor.weight_utils import (
-    default_weight_loader,
-    hf_model_weights_iterator,
-)
+from vllm.model_executor.weight_utils import (default_weight_loader,
+                                              hf_model_weights_iterator)
 
 
 class LlavaLlamaForCausalLM(nn.Module):
diff --git a/python/sglang/srt/models/mixtral.py b/python/sglang/srt/models/mixtral.py
index 01a830807f..82b54193c1 100644
--- a/python/sglang/srt/models/mixtral.py
+++ b/python/sglang/srt/models/mixtral.py
@@ -1,7 +1,7 @@
 # Adapted from
 # https://github.com/vllm-project/vllm/blob/d0215a58e78572d91dadafe9d832a2db89b09a13/vllm/model_executor/models/mixtral.py#L1
 """Inference-only Mixtral model."""
-from typing import List, Optional, Tuple
+from typing import Optional
 
 import numpy as np
 import torch
@@ -12,28 +12,19 @@
 from torch import nn
 from transformers import MixtralConfig
 from vllm.model_executor.layers.layernorm import RMSNorm
-from vllm.model_executor.layers.linear import (
-    LinearMethodBase,
-    QKVParallelLinear,
-    ReplicatedLinear,
-    RowParallelLinear,
-)
+from vllm.model_executor.layers.linear import (LinearMethodBase,
+                                               QKVParallelLinear,
+                                               ReplicatedLinear,
+                                               RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
 from vllm.model_executor.layers.vocab_parallel_embedding import (
-    ParallelLMHead,
-    VocabParallelEmbedding,
-)
-from vllm.model_executor.parallel_utils.communication_op import (
-    tensor_model_parallel_all_reduce,
-)
+    ParallelLMHead, VocabParallelEmbedding)
+from vllm.model_executor.parallel_utils.communication_op import \
+    tensor_model_parallel_all_reduce
 from vllm.model_executor.parallel_utils.parallel_state import (
-    get_tensor_model_parallel_rank,
-    get_tensor_model_parallel_world_size,
-)
-from vllm.model_executor.weight_utils import (
-    default_weight_loader,
-    hf_model_weights_iterator,
-)
+    get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size)
+from vllm.model_executor.weight_utils import (default_weight_loader,
+                                              hf_model_weights_iterator)
 
 
 class MixtralMLP(nn.Module):
diff --git a/python/sglang/srt/models/qwen.py b/python/sglang/srt/models/qwen.py
index 111ad704b2..87cadc3109 100644
--- a/python/sglang/srt/models/qwen.py
+++ b/python/sglang/srt/models/qwen.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, Optional
 
 import torch
 from sglang.srt.layers.logits_processor import LogitsProcessor
@@ -8,24 +8,17 @@
 from transformers import PretrainedConfig
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.layernorm import RMSNorm
-from vllm.model_executor.layers.linear import (
-    LinearMethodBase,
-    MergedColumnParallelLinear,
-    QKVParallelLinear,
-    RowParallelLinear,
-)
+from vllm.model_executor.layers.linear import (LinearMethodBase,
+                                               MergedColumnParallelLinear,
+                                               QKVParallelLinear,
+                                               RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
 from vllm.model_executor.layers.vocab_parallel_embedding import (
-    ParallelLMHead,
-    VocabParallelEmbedding,
-)
-from vllm.model_executor.parallel_utils.parallel_state import (
-    get_tensor_model_parallel_world_size,
-)
-from vllm.model_executor.weight_utils import (
-    default_weight_loader,
-    hf_model_weights_iterator,
-)
+    ParallelLMHead, VocabParallelEmbedding)
+from vllm.model_executor.parallel_utils.parallel_state import \
+    get_tensor_model_parallel_world_size
+from vllm.model_executor.weight_utils import (default_weight_loader,
+                                              hf_model_weights_iterator)
 
 
 class QWenMLP(nn.Module):
diff --git a/python/sglang/srt/models/qwen2.py b/python/sglang/srt/models/qwen2.py
index 26f0a5ae19..559f21e248 100644
--- a/python/sglang/srt/models/qwen2.py
+++ b/python/sglang/srt/models/qwen2.py
@@ -1,7 +1,7 @@
 # Adapted from llama2.py
 # Modify details for the adaptation of Qwen2 model.
 """Inference-only Qwen2 model compatible with HuggingFace weights."""
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, Optional, Tuple
 
 import torch
 from sglang.srt.layers.logits_processor import LogitsProcessor
@@ -10,24 +10,17 @@
 from torch import nn
 from vllm.model_executor.layers.activation import SiluAndMul
 from vllm.model_executor.layers.layernorm import RMSNorm
-from vllm.model_executor.layers.linear import (
-    LinearMethodBase,
-    MergedColumnParallelLinear,
-    QKVParallelLinear,
-    RowParallelLinear,
-)
+from vllm.model_executor.layers.linear import (LinearMethodBase,
+                                               MergedColumnParallelLinear,
+                                               QKVParallelLinear,
+                                               RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
 from vllm.model_executor.layers.vocab_parallel_embedding import (
-    ParallelLMHead,
-    VocabParallelEmbedding,
-)
-from vllm.model_executor.parallel_utils.parallel_state import (
-    get_tensor_model_parallel_world_size,
-)
-from vllm.model_executor.weight_utils import (
-    default_weight_loader,
-    hf_model_weights_iterator,
-)
+    ParallelLMHead, VocabParallelEmbedding)
+from vllm.model_executor.parallel_utils.parallel_state import \
+    get_tensor_model_parallel_world_size
+from vllm.model_executor.weight_utils import (default_weight_loader,
+                                              hf_model_weights_iterator)
 
 Qwen2Config = None
 
diff --git a/python/sglang/srt/models/stablelm.py b/python/sglang/srt/models/stablelm.py
index 5eea538969..678b074d34 100644
--- a/python/sglang/srt/models/stablelm.py
+++ b/python/sglang/srt/models/stablelm.py
@@ -5,31 +5,23 @@
 from typing import Optional, Tuple
 
 import torch
-from torch import nn
-from transformers import PretrainedConfig
-
 from sglang.srt.layers.logits_processor import LogitsProcessor
 from sglang.srt.layers.radix_attention import RadixAttention
 from sglang.srt.managers.router.model_runner import InputMetadata
+from torch import nn
+from transformers import PretrainedConfig
 from vllm.model_executor.layers.activation import SiluAndMul
-from vllm.model_executor.layers.linear import (
-    LinearMethodBase,
-    MergedColumnParallelLinear,
-    QKVParallelLinear,
-    RowParallelLinear,
-)
+from vllm.model_executor.layers.linear import (LinearMethodBase,
+                                               MergedColumnParallelLinear,
+                                               QKVParallelLinear,
+                                               RowParallelLinear)
 from vllm.model_executor.layers.rotary_embedding import get_rope
 from vllm.model_executor.layers.vocab_parallel_embedding import (
-    VocabParallelEmbedding,
-    ParallelLMHead,
-)
-from vllm.model_executor.parallel_utils.parallel_state import (
-    get_tensor_model_parallel_world_size,
-)
-from vllm.model_executor.weight_utils import (
-    default_weight_loader,
-    hf_model_weights_iterator,
-)
+    ParallelLMHead, VocabParallelEmbedding)
+from vllm.model_executor.parallel_utils.parallel_state import \
+    get_tensor_model_parallel_world_size
+from vllm.model_executor.weight_utils import (default_weight_loader,
+                                              hf_model_weights_iterator)
 
 
 class StablelmMLP(nn.Module):
diff --git a/python/sglang/srt/models/yivl.py b/python/sglang/srt/models/yivl.py
index 014e40c6e6..d3c2fe4fe8 100644
--- a/python/sglang/srt/models/yivl.py
+++ b/python/sglang/srt/models/yivl.py
@@ -1,20 +1,14 @@
 """Inference-only Yi-VL model."""
 
-import os
-from typing import List, Optional
+from typing import Optional
 
 import torch
 import torch.nn as nn
-from sglang.srt.models.llava import (
-    LlavaLlamaForCausalLM,
-    clip_vision_embed_forward,
-    monkey_path_clip_vision_embed_forward,
-)
+from sglang.srt.models.llava import (LlavaLlamaForCausalLM,
+                                     monkey_path_clip_vision_embed_forward)
 from transformers import CLIPVisionModel, LlavaConfig
-from vllm.model_executor.weight_utils import (
-    default_weight_loader,
-    hf_model_weights_iterator,
-)
+from vllm.model_executor.weight_utils import (default_weight_loader,
+                                              hf_model_weights_iterator)
 
 
 class YiVLForCausalLM(LlavaLlamaForCausalLM):
diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py
index e9961305d6..d6d0ac3713 100644
--- a/python/sglang/srt/server.py
+++ b/python/sglang/srt/server.py
@@ -24,32 +24,19 @@
 from pydantic import BaseModel
 from sglang.backend.runtime_endpoint import RuntimeEndpoint
 from sglang.srt.constrained import disable_cache
-from sglang.srt.conversation import (
-    Conversation,
-    SeparatorStyle,
-    chat_template_exists,
-    generate_chat_conv,
-    register_conv_template,
-)
+from sglang.srt.conversation import (Conversation, SeparatorStyle,
+                                     chat_template_exists, generate_chat_conv,
+                                     register_conv_template)
 from sglang.srt.hf_transformers_utils import get_tokenizer
 from sglang.srt.managers.detokenizer_manager import start_detokenizer_process
 from sglang.srt.managers.io_struct import DetokenizeReqInput, GenerateReqInput
 from sglang.srt.managers.openai_protocol import (
-    ChatCompletionRequest,
-    ChatCompletionResponse,
-    ChatCompletionResponseChoice,
-    ChatCompletionResponseStreamChoice,
-    ChatCompletionStreamResponse,
-    ChatMessage,
-    CompletionRequest,
-    CompletionResponse,
-    CompletionResponseChoice,
-    CompletionResponseStreamChoice,
-    CompletionStreamResponse,
-    DeltaMessage,
-    LogProbs,
-    UsageInfo,
-)
+    ChatCompletionRequest, ChatCompletionResponse,
+    ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice,
+    ChatCompletionStreamResponse, ChatMessage, CompletionRequest,
+    CompletionResponse, CompletionResponseChoice,
+    CompletionResponseStreamChoice, CompletionStreamResponse, DeltaMessage,
+    LogProbs, UsageInfo)
 from sglang.srt.managers.router.manager import start_router_process
 from sglang.srt.managers.tokenizer_manager import TokenizerManager
 from sglang.srt.server_args import PortArgs, ServerArgs
@@ -527,7 +514,7 @@ def _wait_and_warmup():
             try:
                 requests.get(url + "/get_model_info", timeout=5, headers=headers)
                 break
-            except requests.exceptions.RequestException as e:
+            except requests.exceptions.RequestException:
                 pass
         else:
             if pipe_finish_writer is not None:
diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py
index 86680c3bbf..ecb5831c88 100644
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -149,7 +149,6 @@ def get_exception_traceback():
 
 
 def get_int_token_logit_bias(tokenizer, vocab_size):
-    from transformers import LlamaTokenizer, LlamaTokenizerFast
 
     # a bug when model's vocab size > tokenizer.vocab_size
     vocab_size = tokenizer.vocab_size
diff --git a/python/sglang/test/test_conversation.py b/python/sglang/test/test_conversation.py
index 11e837ddbd..d828060521 100644
--- a/python/sglang/test/test_conversation.py
+++ b/python/sglang/test/test_conversation.py
@@ -1,12 +1,9 @@
 from sglang.srt.conversation import generate_chat_conv
 from sglang.srt.managers.openai_protocol import (
     ChatCompletionMessageContentImagePart,
-    ChatCompletionMessageContentImageURL,
-    ChatCompletionMessageContentTextPart,
-    ChatCompletionMessageGenericParam,
-    ChatCompletionMessageUserParam,
-    ChatCompletionRequest,
-)
+    ChatCompletionMessageContentImageURL, ChatCompletionMessageContentTextPart,
+    ChatCompletionMessageGenericParam, ChatCompletionMessageUserParam,
+    ChatCompletionRequest)
 
 
 def test_chat_completion_to_conv_image():
diff --git a/python/sglang/test/test_openai_protocol.py b/python/sglang/test/test_openai_protocol.py
index 99e7a8089c..72b2b7c1b1 100644
--- a/python/sglang/test/test_openai_protocol.py
+++ b/python/sglang/test/test_openai_protocol.py
@@ -1,11 +1,8 @@
 from sglang.srt.managers.openai_protocol import (
     ChatCompletionMessageContentImagePart,
-    ChatCompletionMessageContentImageURL,
-    ChatCompletionMessageContentTextPart,
-    ChatCompletionMessageGenericParam,
-    ChatCompletionMessageUserParam,
-    ChatCompletionRequest,
-)
+    ChatCompletionMessageContentImageURL, ChatCompletionMessageContentTextPart,
+    ChatCompletionMessageGenericParam, ChatCompletionMessageUserParam,
+    ChatCompletionRequest)
 
 
 def test_chat_completion_request_image():
diff --git a/scripts/format.sh b/scripts/format.sh
index 104db69bf5..20b522e46e 100644
--- a/scripts/format.sh
+++ b/scripts/format.sh
@@ -1,5 +1,5 @@
 isort python
-black python
+ruff python
 
 isort test
-black test
+ruff test
diff --git a/test/lang/run_all.py b/test/lang/run_all.py
index cb5da15850..75d5d5c2b3 100644
--- a/test/lang/run_all.py
+++ b/test/lang/run_all.py
@@ -1,7 +1,6 @@
 import argparse
 import glob
 import multiprocessing
-import os
 import time
 import unittest
 
diff --git a/test/lang/test_anthropic_backend.py b/test/lang/test_anthropic_backend.py
index b0da888381..a693c72b51 100644
--- a/test/lang/test_anthropic_backend.py
+++ b/test/lang/test_anthropic_backend.py
@@ -1,4 +1,3 @@
-import json
 import unittest
 
 from sglang.test.test_programs import test_mt_bench, test_stream
diff --git a/test/lang/test_openai_backend.py b/test/lang/test_openai_backend.py
index 236c548a82..e1276860f7 100644
--- a/test/lang/test_openai_backend.py
+++ b/test/lang/test_openai_backend.py
@@ -1,19 +1,11 @@
 import unittest
 
-from sglang.test.test_programs import (
-    test_decode_int,
-    test_decode_json,
-    test_expert_answer,
-    test_few_shot_qa,
-    test_image_qa,
-    test_mt_bench,
-    test_parallel_decoding,
-    test_parallel_encoding,
-    test_react,
-    test_select,
-    test_stream,
-    test_tool_use,
-)
+from sglang.test.test_programs import (test_decode_int, test_decode_json,
+                                       test_expert_answer, test_few_shot_qa,
+                                       test_image_qa, test_mt_bench,
+                                       test_parallel_decoding,
+                                       test_parallel_encoding, test_react,
+                                       test_select, test_stream, test_tool_use)
 
 from sglang import OpenAI, set_default_backend
 
diff --git a/test/lang/test_srt_backend.py b/test/lang/test_srt_backend.py
index 82a9f1ad46..90ea028554 100644
--- a/test/lang/test_srt_backend.py
+++ b/test/lang/test_srt_backend.py
@@ -2,23 +2,13 @@
 python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
 """
 
-import json
 import unittest
 
-from sglang.test.test_programs import (
-    test_decode_int,
-    test_decode_json_regex,
-    test_expert_answer,
-    test_few_shot_qa,
-    test_mt_bench,
-    test_parallel_decoding,
-    test_parallel_encoding,
-    test_react,
-    test_regex,
-    test_select,
-    test_stream,
-    test_tool_use,
-)
+from sglang.test.test_programs import (test_decode_int, test_decode_json_regex,
+                                       test_expert_answer, test_few_shot_qa,
+                                       test_mt_bench, test_parallel_decoding,
+                                       test_regex, test_select, test_stream,
+                                       test_tool_use)
 
 import sglang as sgl
 
diff --git a/test/lang/test_tracing.py b/test/lang/test_tracing.py
index cdc9000d89..11a3e61809 100644
--- a/test/lang/test_tracing.py
+++ b/test/lang/test_tracing.py
@@ -111,7 +111,7 @@ def tip_suggestion(s):
             forks = s.fork(3)
             for i in range(3):
                 forks[i] += f"Now, expand tip {i+1} into a paragraph:\n"
-                forks[i] += sgl.gen(f"detailed_tip")
+                forks[i] += sgl.gen("detailed_tip")
 
             s += "Tip 1:" + forks[0]["detailed_tip"] + "\n"
             s += "Tip 2:" + forks[1]["detailed_tip"] + "\n"
diff --git a/test/lang/test_vertexai_backend.py b/test/lang/test_vertexai_backend.py
index a17ab4ba74..fa6af1c38a 100644
--- a/test/lang/test_vertexai_backend.py
+++ b/test/lang/test_vertexai_backend.py
@@ -1,14 +1,9 @@
 import unittest
 
-from sglang.test.test_programs import (
-    test_expert_answer,
-    test_few_shot_qa,
-    test_image_qa,
-    test_mt_bench,
-    test_parallel_decoding,
-    test_parallel_encoding,
-    test_stream,
-)
+from sglang.test.test_programs import (test_expert_answer, test_few_shot_qa,
+                                       test_image_qa, test_mt_bench,
+                                       test_parallel_decoding,
+                                       test_parallel_encoding, test_stream)
 
 from sglang import VertexAI, set_default_backend
 
diff --git a/test/srt/model/reference_hf.py b/test/srt/model/reference_hf.py
index e63866f026..4060f9212c 100644
--- a/test/srt/model/reference_hf.py
+++ b/test/srt/model/reference_hf.py
@@ -1,5 +1,4 @@
 import argparse
-import os
 
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
diff --git a/test/srt/model/test_llama_extend.py b/test/srt/model/test_llama_extend.py
index 2931dfa5dc..8b0a1371a4 100644
--- a/test/srt/model/test_llama_extend.py
+++ b/test/srt/model/test_llama_extend.py
@@ -1,10 +1,6 @@
 import multiprocessing
 import os
-import time
 
-import numpy as np
-import torch
-import torch.distributed as dist
 import transformers
 from sglang.srt.managers.router.infer_batch import Batch, ForwardMode, Req
 from sglang.srt.managers.router.model_runner import ModelRunner
diff --git a/test/srt/model/test_llava_low_api.py b/test/srt/model/test_llava_low_api.py
index 322ba4855f..fb6f6abc6b 100644
--- a/test/srt/model/test_llava_low_api.py
+++ b/test/srt/model/test_llava_low_api.py
@@ -1,12 +1,9 @@
 import multiprocessing
-import time
 
 import numpy as np
 import torch
-import torch.distributed as dist
 from sglang.srt.hf_transformers_utils import get_processor
-from sglang.srt.managers.router.infer_batch import ForwardMode
-from sglang.srt.managers.router.model_runner import InputMetadata, ModelRunner
+from sglang.srt.managers.router.model_runner import ModelRunner
 from sglang.srt.model_config import ModelConfig
 from sglang.srt.utils import load_image
 
diff --git a/test/srt/test_httpserver_concurrent.py b/test/srt/test_httpserver_concurrent.py
index 855e51f33d..6cdd5332dd 100644
--- a/test/srt/test_httpserver_concurrent.py
+++ b/test/srt/test_httpserver_concurrent.py
@@ -9,11 +9,8 @@
 
 import argparse
 import asyncio
-import json
-import time
 
 import aiohttp
-import requests
 
 
 async def send_request(url, data, delay=0):
diff --git a/test/srt/test_httpserver_llava.py b/test/srt/test_httpserver_llava.py
index 0f6571b450..6db4ab9303 100644
--- a/test/srt/test_httpserver_llava.py
+++ b/test/srt/test_httpserver_llava.py
@@ -10,7 +10,6 @@
 import argparse
 import asyncio
 import json
-import time
 
 import aiohttp
 import requests
diff --git a/test/srt/test_httpserver_reuse.py b/test/srt/test_httpserver_reuse.py
index c3f7865899..ef866afc6b 100644
--- a/test/srt/test_httpserver_reuse.py
+++ b/test/srt/test_httpserver_reuse.py
@@ -6,7 +6,6 @@
 """
 
 import argparse
-import time
 
 import requests
 
diff --git a/test/srt/test_jump_forward.py b/test/srt/test_jump_forward.py
index 15ec2caffd..832409be53 100644
--- a/test/srt/test_jump_forward.py
+++ b/test/srt/test_jump_forward.py
@@ -3,10 +3,8 @@
 
 from pydantic import BaseModel, constr
 from sglang.srt.constrained import build_regex_from_object
-from sglang.test.test_utils import (
-    add_common_sglang_args_and_parse,
-    select_sglang_backend,
-)
+from sglang.test.test_utils import (add_common_sglang_args_and_parse,
+                                    select_sglang_backend)
 
 import sglang as sgl
 
diff --git a/test/srt/test_robust.py b/test/srt/test_robust.py
index 5b479318f5..81c66fbcc8 100644
--- a/test/srt/test_robust.py
+++ b/test/srt/test_robust.py
@@ -2,10 +2,8 @@
 import random
 import string
 
-from sglang.test.test_utils import (
-    add_common_sglang_args_and_parse,
-    select_sglang_backend,
-)
+from sglang.test.test_utils import (add_common_sglang_args_and_parse,
+                                    select_sglang_backend)
 from vllm.transformers_utils.tokenizer import get_tokenizer
 
 import sglang as sgl