diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 04eb1ecc304..1e09712abb7 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -27,9 +27,9 @@ repos:
     rev: v0.11.7
     hooks:
       - id: ruff
-        args: [--select=F401, --fixable=F401]
-        files: ^(benchmark/|docs/|examples/)
-        exclude: \.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$
+        args: [--select=F401,F821, --fixable=F401]
+        files: ^(benchmark/|docs/|examples/|python/sglang/)
+        exclude: __init__\.py$|\.ipynb$|^python/sglang/srt/grpc/.*_pb2\.py$|^python/sglang/srt/grpc/.*_pb2_grpc\.py$|^python/sglang/srt/grpc/.*_pb2\.pyi$|^python/sglang/srt/grpc/.*_pb2_grpc\.pyi$
   - repo: https://github.com/psf/black
     rev: 24.10.0
     hooks:
diff --git a/python/sglang/srt/_custom_ops.py b/python/sglang/srt/_custom_ops.py
index 5ed175312c9..de47707c18a 100644
--- a/python/sglang/srt/_custom_ops.py
+++ b/python/sglang/srt/_custom_ops.py
@@ -15,7 +15,7 @@
     # ROCm does not use vllm custom allreduce
     if use_vllm_custom_allreduce and not is_hip():
         try:
-            import vllm._C
+            import vllm._C  # noqa: F401
         except ImportError as e:
             logger.warning("Failed to import from vllm._C with %r", e)
     else:
diff --git a/python/sglang/srt/compilation/cuda_piecewise_backend.py b/python/sglang/srt/compilation/cuda_piecewise_backend.py
index 9f4b8cc8e8a..44e3803ff5d 100644
--- a/python/sglang/srt/compilation/cuda_piecewise_backend.py
+++ b/python/sglang/srt/compilation/cuda_piecewise_backend.py
@@ -9,7 +9,6 @@
 import torch
 import torch.fx as fx
 
-import sglang.srt.compilation.weak_ref_tensor_jit
 from sglang.srt.compilation.compilation_config import CompilationConfig
 from sglang.srt.compilation.compilation_counter import compilation_counter
 
diff --git a/python/sglang/srt/configs/deepseekvl2.py b/python/sglang/srt/configs/deepseekvl2.py
index bcb0afe5ae7..9621f058bf6 100644
--- a/python/sglang/srt/configs/deepseekvl2.py
+++ b/python/sglang/srt/configs/deepseekvl2.py
@@ -1,5 +1,4 @@
 import math
-import os
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Tuple
 
diff --git a/python/sglang/srt/configs/dots_vlm.py b/python/sglang/srt/configs/dots_vlm.py
index 155d6ee47c1..dc921582ccf 100644
--- a/python/sglang/srt/configs/dots_vlm.py
+++ b/python/sglang/srt/configs/dots_vlm.py
@@ -1,10 +1,5 @@
-from typing import Any, List, Optional, Union
-
-from transformers import AutoProcessor, LlamaTokenizerFast, PretrainedConfig
-from transformers.feature_extraction_utils import BatchFeature
-from transformers.image_utils import ImageInput
-from transformers.processing_utils import ProcessingKwargs, Unpack
-from transformers.tokenization_utils_base import PreTokenizedInput, TextInput
+from transformers import AutoProcessor, PretrainedConfig
+from transformers.processing_utils import ProcessingKwargs
 
 try:
     from transformers import Qwen2_5_VLProcessor
diff --git a/python/sglang/srt/configs/falcon_h1.py b/python/sglang/srt/configs/falcon_h1.py
index d323b056db2..b8869b4ffa3 100644
--- a/python/sglang/srt/configs/falcon_h1.py
+++ b/python/sglang/srt/configs/falcon_h1.py
@@ -14,17 +14,12 @@
 # limitations under the License.
 """Falcon-H1 model configuration"""
 
-import enum
 
 from transformers.configuration_utils import PretrainedConfig
-from transformers.modeling_rope_utils import rope_config_validation
 from transformers.utils import logging
 
 from sglang.srt.configs.mamba_utils import Mamba2CacheParams, Mamba2StateShape
-from sglang.srt.layers.dp_attention import (
-    get_attention_tp_size,
-    get_tensor_model_parallel_world_size,
-)
+from sglang.srt.layers.dp_attention import get_tensor_model_parallel_world_size
 
 logger = logging.get_logger(__name__)
 
diff --git a/python/sglang/srt/configs/qwen3_next.py b/python/sglang/srt/configs/qwen3_next.py
index 09c9b5a1b3e..630227a2c62 100644
--- a/python/sglang/srt/configs/qwen3_next.py
+++ b/python/sglang/srt/configs/qwen3_next.py
@@ -21,7 +21,6 @@
 from transformers.utils import logging
 
 from sglang.srt.configs.mamba_utils import Mamba2CacheParams, Mamba2StateShape
-from sglang.srt.distributed.utils import divide
 from sglang.srt.layers.dp_attention import get_attention_tp_size
 
 logger = logging.get_logger(__name__)
diff --git a/python/sglang/srt/connector/remote_instance.py b/python/sglang/srt/connector/remote_instance.py
index e1f00037f8c..0a4e67cfd2f 100644
--- a/python/sglang/srt/connector/remote_instance.py
+++ b/python/sglang/srt/connector/remote_instance.py
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import logging
-from typing import Generator, List, Optional, Tuple
+from typing import Generator, Optional, Tuple
 from urllib.parse import urlparse
 
 import torch
diff --git a/python/sglang/srt/disaggregation/ascend/transfer_engine.py b/python/sglang/srt/disaggregation/ascend/transfer_engine.py
index a1fe58ce605..a701838b6a6 100644
--- a/python/sglang/srt/disaggregation/ascend/transfer_engine.py
+++ b/python/sglang/srt/disaggregation/ascend/transfer_engine.py
@@ -1,6 +1,6 @@
 import logging
 import os
-from typing import List, Optional
+from typing import List
 
 import torch
 
diff --git a/python/sglang/srt/disaggregation/decode.py b/python/sglang/srt/disaggregation/decode.py
index 45589ec51fb..5e05cdd7408 100644
--- a/python/sglang/srt/disaggregation/decode.py
+++ b/python/sglang/srt/disaggregation/decode.py
@@ -25,7 +25,7 @@
 from collections import deque
 from dataclasses import dataclass
 from http import HTTPStatus
-from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type, Union
+from typing import TYPE_CHECKING, List, Optional, Tuple, Type, Union
 
 import torch
 from torch.distributed import ProcessGroup
@@ -48,10 +48,7 @@
 )
 from sglang.srt.layers.dp_attention import get_attention_tp_size
 from sglang.srt.managers.schedule_batch import FINISH_ABORT, RequestStage, ScheduleBatch
-from sglang.srt.mem_cache.allocator import (
-    BaseTokenToKVPoolAllocator,
-    SWATokenToKVPoolAllocator,
-)
+from sglang.srt.mem_cache.allocator import BaseTokenToKVPoolAllocator
 from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache
 from sglang.srt.mem_cache.memory_pool import (
     HybridLinearKVPool,
@@ -61,7 +58,6 @@
     ReqToTokenPool,
     SWAKVPool,
 )
-from sglang.srt.model_executor.forward_batch_info import ForwardMode
 from sglang.srt.utils import get_int_env_var, require_mlp_sync
 from sglang.srt.utils.torch_memory_saver_adapter import TorchMemorySaverAdapter
 
diff --git a/python/sglang/srt/disaggregation/prefill.py b/python/sglang/srt/disaggregation/prefill.py
index 23cd0dd1754..86ef0498fc5 100644
--- a/python/sglang/srt/disaggregation/prefill.py
+++ b/python/sglang/srt/disaggregation/prefill.py
@@ -20,7 +20,6 @@
 from __future__ import annotations
 
 import logging
-import threading
 import time
 from collections import deque
 from http import HTTPStatus
@@ -54,7 +53,7 @@
     NSATokenToKVPool,
     SWAKVPool,
 )
-from sglang.srt.model_executor.forward_batch_info import ForwardMode, PPProxyTensors
+from sglang.srt.model_executor.forward_batch_info import PPProxyTensors
 from sglang.srt.utils import (
     DynamicGradMode,
     broadcast_pyobj,
diff --git a/python/sglang/srt/distributed/device_communicators/custom_all_reduce.py b/python/sglang/srt/distributed/device_communicators/custom_all_reduce.py
index bb7128206a5..72668bf2e26 100644
--- a/python/sglang/srt/distributed/device_communicators/custom_all_reduce.py
+++ b/python/sglang/srt/distributed/device_communicators/custom_all_reduce.py
@@ -32,7 +32,7 @@
         ops.meta_size()
     else:
         # Use custom allreduce from sgl kernel (ROCM and TRT-LLM)
-        import sgl_kernel
+        import sgl_kernel  # noqa: F401
     custom_ar = True
 except Exception:
     # For CPUs
diff --git a/python/sglang/srt/distributed/device_communicators/pymscclpp.py b/python/sglang/srt/distributed/device_communicators/pymscclpp.py
index 78269ed05a3..5d7511c2c2a 100644
--- a/python/sglang/srt/distributed/device_communicators/pymscclpp.py
+++ b/python/sglang/srt/distributed/device_communicators/pymscclpp.py
@@ -4,7 +4,7 @@
 import os
 from contextlib import contextmanager
 from enum import IntEnum
-from typing import Any, Callable, List, Optional, TypeVar, Union
+from typing import Optional, Union
 
 import torch
 import torch.distributed as dist
@@ -24,7 +24,7 @@
     mscclpp_is_available = False
 if _is_cuda:
     try:
-        import sgl_kernel
+        import sgl_kernel  # noqa: F401
 
         mscclpp_is_available = True
     except:
diff --git a/python/sglang/srt/distributed/device_communicators/symm_mem.py b/python/sglang/srt/distributed/device_communicators/symm_mem.py
index 0d69a33a28f..48e20627e90 100644
--- a/python/sglang/srt/distributed/device_communicators/symm_mem.py
+++ b/python/sglang/srt/distributed/device_communicators/symm_mem.py
@@ -9,7 +9,7 @@
 from sglang.srt.distributed.device_communicators.all_reduce_utils import (
     SYMM_MEM_ALL_REDUCE_MAX_SIZES,
 )
-from sglang.srt.utils import get_device_capability, is_cuda, is_hip
+from sglang.srt.utils import is_cuda, is_hip
 
 try:
     import torch.distributed._symmetric_memory as torch_symm_mem
diff --git a/python/sglang/srt/distributed/naive_distributed.py b/python/sglang/srt/distributed/naive_distributed.py
index 61165d90c05..b340ff44d6e 100644
--- a/python/sglang/srt/distributed/naive_distributed.py
+++ b/python/sglang/srt/distributed/naive_distributed.py
@@ -1,5 +1,4 @@
 import base64
-import os
 import pickle
 import time
 from pathlib import Path
diff --git a/python/sglang/srt/entrypoints/context.py b/python/sglang/srt/entrypoints/context.py
index 9314083b4c1..972c0f4f3ca 100644
--- a/python/sglang/srt/entrypoints/context.py
+++ b/python/sglang/srt/entrypoints/context.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 # Copied from vLLM
-import json
 import logging
 from abc import ABC, abstractmethod
 from typing import Union
diff --git a/python/sglang/srt/entrypoints/harmony_utils.py b/python/sglang/srt/entrypoints/harmony_utils.py
index ad6350d165f..68bbbf09467 100644
--- a/python/sglang/srt/entrypoints/harmony_utils.py
+++ b/python/sglang/srt/entrypoints/harmony_utils.py
@@ -3,7 +3,6 @@
 # Adapted from vLLM: https://github.com/vllm-project/vllm/blob/1b9902806915040ac9b3029f2ab7522ec505afc3/vllm/entrypoints/harmony_utils.py
 # Slight differences in processing chat messages
 import datetime
-import json
 from collections.abc import Iterable
 from typing import Literal, Optional, Union
 
diff --git a/python/sglang/srt/entrypoints/http_server.py b/python/sglang/srt/entrypoints/http_server.py
index 335be026d09..00fe4ca17e7 100644
--- a/python/sglang/srt/entrypoints/http_server.py
+++ b/python/sglang/srt/entrypoints/http_server.py
@@ -19,7 +19,6 @@
 
 import asyncio
 import dataclasses
-import json
 import logging
 import multiprocessing as multiprocessing
 import os
diff --git a/python/sglang/srt/entrypoints/http_server_engine.py b/python/sglang/srt/entrypoints/http_server_engine.py
index d1db80d656f..9ab665a05a7 100644
--- a/python/sglang/srt/entrypoints/http_server_engine.py
+++ b/python/sglang/srt/entrypoints/http_server_engine.py
@@ -1,15 +1,9 @@
-import copy
-import dataclasses
 import multiprocessing
-import pickle
-import threading
 import time
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import List, Optional, Tuple
 
-import pybase64
 import requests
 import torch
-import torch.distributed as dist
 
 from sglang.srt.entrypoints.EngineBase import EngineBase
 from sglang.srt.entrypoints.http_server import launch_server
diff --git a/python/sglang/srt/eplb/eplb_algorithms/deepseek.py b/python/sglang/srt/eplb/eplb_algorithms/deepseek.py
index 180ccdee452..34bbc491027 100644
--- a/python/sglang/srt/eplb/eplb_algorithms/deepseek.py
+++ b/python/sglang/srt/eplb/eplb_algorithms/deepseek.py
@@ -3,8 +3,6 @@
 
 import torch
 
-from sglang.srt.utils import get_bool_env_var
-
 
 def balanced_packing(
     weight: torch.Tensor, num_packs: int
diff --git a/python/sglang/srt/function_call/glm4_moe_detector.py b/python/sglang/srt/function_call/glm4_moe_detector.py
index 845b5d41fd6..301d0e0dedc 100644
--- a/python/sglang/srt/function_call/glm4_moe_detector.py
+++ b/python/sglang/srt/function_call/glm4_moe_detector.py
@@ -6,11 +6,7 @@
 
 from sglang.srt.entrypoints.openai.protocol import Tool
 from sglang.srt.function_call.base_format_detector import BaseFormatDetector
-from sglang.srt.function_call.core_types import (
-    StreamingParseResult,
-    StructureInfo,
-    _GetInfoFunc,
-)
+from sglang.srt.function_call.core_types import StreamingParseResult, _GetInfoFunc
 from sglang.srt.function_call.ebnf_composer import EBNFComposer
 
 logger = logging.getLogger(__name__)
diff --git a/python/sglang/srt/function_call/json_array_parser.py b/python/sglang/srt/function_call/json_array_parser.py
index 5144cb83b7d..6d6bffc996c 100644
--- a/python/sglang/srt/function_call/json_array_parser.py
+++ b/python/sglang/srt/function_call/json_array_parser.py
@@ -1,5 +1,3 @@
-import json
-import re
 from typing import List
 
 from sglang.srt.entrypoints.openai.protocol import Tool
diff --git a/python/sglang/srt/function_call/utils.py b/python/sglang/srt/function_call/utils.py
index 5ad3f6e89a0..d85e5e6c030 100644
--- a/python/sglang/srt/function_call/utils.py
+++ b/python/sglang/srt/function_call/utils.py
@@ -1,4 +1,3 @@
-import json
 from json import JSONDecodeError, JSONDecoder
 from json.decoder import WHITESPACE
 from typing import Any, List, Literal, Optional, Tuple, Union
diff --git a/python/sglang/srt/grpc/compile_proto.py b/python/sglang/srt/grpc/compile_proto.py
index 7aa145075c9..c2c4c0aa64f 100755
--- a/python/sglang/srt/grpc/compile_proto.py
+++ b/python/sglang/srt/grpc/compile_proto.py
@@ -70,7 +70,7 @@ def compile_proto(proto_file: Path, output_dir: Path, verbose: bool = True) -> b
 
     # Check if grpc_tools is available
     try:
-        import grpc_tools.protoc
+        import grpc_tools.protoc  # noqa: F401
     except ImportError:
         print("Error: grpcio-tools not installed")
         print(
diff --git a/python/sglang/srt/grpc/grpc_request_manager.py b/python/sglang/srt/grpc/grpc_request_manager.py
index a8acb4bc411..81845388b02 100644
--- a/python/sglang/srt/grpc/grpc_request_manager.py
+++ b/python/sglang/srt/grpc/grpc_request_manager.py
@@ -27,7 +27,6 @@
     TokenizedEmbeddingReqInput,
     TokenizedGenerateReqInput,
 )
-from sglang.srt.managers.scheduler import is_health_check_generate_req
 from sglang.srt.server_args import PortArgs, ServerArgs
 from sglang.srt.utils import get_zmq_socket, kill_process_tree
 from sglang.utils import get_exception_traceback
diff --git a/python/sglang/srt/layers/activation.py b/python/sglang/srt/layers/activation.py
index 5dc48821adc..f9bb6d6f57d 100644
--- a/python/sglang/srt/layers/activation.py
+++ b/python/sglang/srt/layers/activation.py
@@ -380,4 +380,7 @@ def get_cross_encoder_activation_function(config: PretrainedConfig):
     logger.info(
         "sgl-kernel is not available on Non-NV, Non-AMD platforms or Non-AMX CPUs. Fallback to other kernel libraries."
     )
-    from vllm.model_executor.layers.activation import GeluAndMul, SiluAndMul
+    from vllm.model_executor.layers.activation import (  # noqa: F401
+        GeluAndMul,
+        SiluAndMul,
+    )
diff --git a/python/sglang/srt/layers/attention/ascend_backend.py b/python/sglang/srt/layers/attention/ascend_backend.py
index bc118d6c505..f795c65d0f5 100644
--- a/python/sglang/srt/layers/attention/ascend_backend.py
+++ b/python/sglang/srt/layers/attention/ascend_backend.py
@@ -20,7 +20,6 @@
     from sglang.srt.layers.radix_attention import RadixAttention
     from sglang.srt.model_executor.model_runner import ModelRunner
 
-import os
 
 import numpy as np
 
diff --git a/python/sglang/srt/layers/attention/base_attn_backend.py b/python/sglang/srt/layers/attention/base_attn_backend.py
index d0ab5ca82b7..dcbf1c8fdf1 100644
--- a/python/sglang/srt/layers/attention/base_attn_backend.py
+++ b/python/sglang/srt/layers/attention/base_attn_backend.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Optional, Union
+from typing import TYPE_CHECKING, Optional
 
 import torch
 
diff --git a/python/sglang/srt/layers/attention/fla/chunk.py b/python/sglang/srt/layers/attention/fla/chunk.py
index a48a9e649f3..21d93ac0044 100644
--- a/python/sglang/srt/layers/attention/fla/chunk.py
+++ b/python/sglang/srt/layers/attention/fla/chunk.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 # Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
 
-import warnings
 from typing import Optional
 
 import torch
diff --git a/python/sglang/srt/layers/attention/fla/chunk_o.py b/python/sglang/srt/layers/attention/fla/chunk_o.py
index d672c646beb..b2ae826f760 100644
--- a/python/sglang/srt/layers/attention/fla/chunk_o.py
+++ b/python/sglang/srt/layers/attention/fla/chunk_o.py
@@ -2,7 +2,7 @@
 # -*- coding: utf-8 -*-
 # Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
 
-from typing import Optional, Tuple
+from typing import Optional
 
 import torch
 import triton
diff --git a/python/sglang/srt/layers/attention/fla/index.py b/python/sglang/srt/layers/attention/fla/index.py
index 754b9871462..31b2e524e2a 100644
--- a/python/sglang/srt/layers/attention/fla/index.py
+++ b/python/sglang/srt/layers/attention/fla/index.py
@@ -3,9 +3,7 @@
 # Copyright (c) 2023-2025, Songlin Yang, Yu Zhang
 
 import torch
-import torch.nn.functional as F
 import triton
-import triton.language as tl
 
 from sglang.srt.layers.attention.fla.utils import tensor_cache
 
diff --git a/python/sglang/srt/layers/attention/fla/layernorm_gated.py b/python/sglang/srt/layers/attention/fla/layernorm_gated.py
index 50b7244c6e9..b7dd39b1292 100644
--- a/python/sglang/srt/layers/attention/fla/layernorm_gated.py
+++ b/python/sglang/srt/layers/attention/fla/layernorm_gated.py
@@ -5,7 +5,6 @@
 # This backward pass is faster for dimensions up to 8k, but after that it's much slower due to register spilling.
 # The models we train have hidden dim up to 8k anyway (e.g. Llama 70B), so this is fine.
 
-import math
 
 import torch
 import torch.nn.functional as F
diff --git a/python/sglang/srt/layers/attention/fla/wy_fast.py b/python/sglang/srt/layers/attention/fla/wy_fast.py
index d51500eb459..fa39312df21 100644
--- a/python/sglang/srt/layers/attention/fla/wy_fast.py
+++ b/python/sglang/srt/layers/attention/fla/wy_fast.py
@@ -9,8 +9,6 @@
 import triton.language as tl
 
 from sglang.srt.layers.attention.fla.index import prepare_chunk_indices
-from sglang.srt.layers.attention.fla.op import safe_exp
-from sglang.srt.layers.attention.fla.utils import check_shared_mem
 
 
 @triton.heuristics({"IS_VARLEN": lambda args: args["cu_seqlens"] is not None})
diff --git a/python/sglang/srt/layers/attention/flashinfer_backend.py b/python/sglang/srt/layers/attention/flashinfer_backend.py
index ab4398b0b4d..33ff82ca6b2 100644
--- a/python/sglang/srt/layers/attention/flashinfer_backend.py
+++ b/python/sglang/srt/layers/attention/flashinfer_backend.py
@@ -50,7 +50,6 @@
         fast_decode_plan,
     )
     from flashinfer.cascade import merge_state
-    from flashinfer.decode import _get_range_buf, get_seq_lens
 
 
 class WrapperDispatch(Enum):
diff --git a/python/sglang/srt/layers/attention/hybrid_attn_backend.py b/python/sglang/srt/layers/attention/hybrid_attn_backend.py
index 7a78fd4d1c6..4f1439c264a 100644
--- a/python/sglang/srt/layers/attention/hybrid_attn_backend.py
+++ b/python/sglang/srt/layers/attention/hybrid_attn_backend.py
@@ -1,4 +1,4 @@
-from typing import Optional, Union
+from typing import Optional
 
 import torch
 
diff --git a/python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py b/python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py
index 7f2e90255fd..5ea9e6c8e43 100644
--- a/python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py
+++ b/python/sglang/srt/layers/attention/hybrid_linear_attn_backend.py
@@ -1,9 +1,6 @@
-from dataclasses import astuple, dataclass
-from functools import lru_cache
 from typing import Optional, Union
 
 import torch
-import torch.nn.functional as F
 
 from sglang.srt.layers.attention.base_attn_backend import AttentionBackend
 from sglang.srt.layers.attention.fla.chunk import chunk_gated_delta_rule
diff --git a/python/sglang/srt/layers/attention/intel_amx_backend.py b/python/sglang/srt/layers/attention/intel_amx_backend.py
index 39e5c7428ad..4b2974c44e0 100644
--- a/python/sglang/srt/layers/attention/intel_amx_backend.py
+++ b/python/sglang/srt/layers/attention/intel_amx_backend.py
@@ -14,7 +14,7 @@
 
 class IntelAMXAttnBackend(AttentionBackend):
     def __init__(self, model_runner: ModelRunner):
-        import sgl_kernel
+        import sgl_kernel  # noqa: F401
 
         super().__init__()
         self.forward_metadata = None
diff --git a/python/sglang/srt/layers/attention/mamba/causal_conv1d_triton.py b/python/sglang/srt/layers/attention/mamba/causal_conv1d_triton.py
index dbd9dac347a..88a65ddd0a1 100644
--- a/python/sglang/srt/layers/attention/mamba/causal_conv1d_triton.py
+++ b/python/sglang/srt/layers/attention/mamba/causal_conv1d_triton.py
@@ -4,7 +4,6 @@
 
 from typing import List, Optional, Union
 
-import numpy as np
 import torch
 import triton
 import triton.language as tl
diff --git a/python/sglang/srt/layers/attention/mamba/ops/ssd_combined.py b/python/sglang/srt/layers/attention/mamba/ops/ssd_combined.py
index d27fc562ea7..6e2e74752ba 100644
--- a/python/sglang/srt/layers/attention/mamba/ops/ssd_combined.py
+++ b/python/sglang/srt/layers/attention/mamba/ops/ssd_combined.py
@@ -10,7 +10,6 @@
 
 import torch
 import triton
-import triton.language as tl
 from einops import rearrange
 from packaging import version
 
diff --git a/python/sglang/srt/layers/attention/npu_ops/mla_preprocess.py b/python/sglang/srt/layers/attention/npu_ops/mla_preprocess.py
index 06a55254529..76f802bd291 100644
--- a/python/sglang/srt/layers/attention/npu_ops/mla_preprocess.py
+++ b/python/sglang/srt/layers/attention/npu_ops/mla_preprocess.py
@@ -13,7 +13,7 @@ def is_mla_preprocess_enabled() -> bool:
 
 
 if is_mla_preprocess_enabled():
-    import sgl_kernel_npu
+    import sgl_kernel_npu  # noqa: F401
     import torch_npu
 
     torch.npu.config.allow_internal_format = True
diff --git a/python/sglang/srt/layers/attention/nsa/nsa_indexer.py b/python/sglang/srt/layers/attention/nsa/nsa_indexer.py
index ebb5b85da26..b9f399899e2 100644
--- a/python/sglang/srt/layers/attention/nsa/nsa_indexer.py
+++ b/python/sglang/srt/layers/attention/nsa/nsa_indexer.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple
+from typing import TYPE_CHECKING, Any, Dict, Optional
 
 import torch
 import torch.nn.functional as F
@@ -547,7 +547,7 @@ def forward_npu(
         forward_batch: ForwardBatch,
         layer_id: int,
     ) -> torch.Tensor:
-        import custom_ops
+        import custom_ops  # noqa: F401
         import torch_npu
 
         from sglang.srt.layers.dp_attention import (
diff --git a/python/sglang/srt/layers/attention/nsa_backend.py b/python/sglang/srt/layers/attention/nsa_backend.py
index 74d293fd310..6ec4652f415 100644
--- a/python/sglang/srt/layers/attention/nsa_backend.py
+++ b/python/sglang/srt/layers/attention/nsa_backend.py
@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import sys
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Dict, List, Literal, Optional, TypeAlias
 
@@ -34,18 +33,18 @@
 
 if _is_hip:
     try:
-        from aiter import (
+        from aiter import (  # noqa: F401
             flash_attn_varlen_func,
             mha_batch_prefill_func,
             paged_attention_ragged,
         )
-        from aiter.mla import mla_decode_fwd, mla_prefill_fwd
+        from aiter.mla import mla_decode_fwd, mla_prefill_fwd  # noqa: F401
     except ImportError:
         print(
             "aiter is AMD specific kernel library. Please make sure aiter is installed on your AMD device."
         )
 else:
-    from sgl_kernel.flash_attn import flash_attn_varlen_func, flash_attn_with_kvcache
+    from sgl_kernel.flash_attn import flash_attn_with_kvcache
 
 
 @dataclass(frozen=True)
diff --git a/python/sglang/srt/layers/layernorm.py b/python/sglang/srt/layers/layernorm.py
index a0b75780bd7..c60314ad913 100644
--- a/python/sglang/srt/layers/layernorm.py
+++ b/python/sglang/srt/layers/layernorm.py
@@ -372,4 +372,4 @@ def extra_repr(self):
     logger.info(
         "sgl-kernel layernorm implementation is not available on current platform. Fallback to other kernel libraries."
     )
-    from vllm.model_executor.layers.layernorm import GemmaRMSNorm, RMSNorm
+    from vllm.model_executor.layers.layernorm import GemmaRMSNorm, RMSNorm  # noqa: F401
diff --git a/python/sglang/srt/layers/moe/cutlass_moe.py b/python/sglang/srt/layers/moe/cutlass_moe.py
index d0fb4e3ef48..870749d4799 100755
--- a/python/sglang/srt/layers/moe/cutlass_moe.py
+++ b/python/sglang/srt/layers/moe/cutlass_moe.py
@@ -116,8 +116,6 @@ def cutlass_fused_experts_fp8(
 
     if is_cuda:
         from sglang.srt.layers.quantization.fp8_kernel import (
-            per_group_transpose,
-            per_token_group_quant_fp8_hopper_moe_mn_major,
             sglang_per_token_group_quant_fp8,
         )
 
diff --git a/python/sglang/srt/layers/moe/cutlass_w4a8_moe.py b/python/sglang/srt/layers/moe/cutlass_w4a8_moe.py
index 2a84dedc4bf..800c8c83a6b 100644
--- a/python/sglang/srt/layers/moe/cutlass_w4a8_moe.py
+++ b/python/sglang/srt/layers/moe/cutlass_w4a8_moe.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 """Cutlass W4A8 MoE kernel."""
-import logging
 from typing import Optional
 
 import torch
diff --git a/python/sglang/srt/layers/moe/ep_moe/kernels.py b/python/sglang/srt/layers/moe/ep_moe/kernels.py
index ef4262a1c1c..89bab802cf0 100644
--- a/python/sglang/srt/layers/moe/ep_moe/kernels.py
+++ b/python/sglang/srt/layers/moe/ep_moe/kernels.py
@@ -1,12 +1,9 @@
 import logging
-from typing import List, Optional
 
 import torch
 import triton
 
-from sglang.srt.layers.quantization.fp8_kernel import per_token_group_quant_fp8
-from sglang.srt.utils import ceil_div, dispose_tensor, is_cuda
-from sglang.utils import is_in_ci
+from sglang.srt.utils import ceil_div, is_cuda
 
 logger = logging.getLogger(__name__)
 
diff --git a/python/sglang/srt/layers/moe/flashinfer_cutedsl_moe.py b/python/sglang/srt/layers/moe/flashinfer_cutedsl_moe.py
index 1d37236e020..8026b1e67c5 100644
--- a/python/sglang/srt/layers/moe/flashinfer_cutedsl_moe.py
+++ b/python/sglang/srt/layers/moe/flashinfer_cutedsl_moe.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional, Union
+from typing import Optional, Union
 
 import torch
 from flashinfer.cute_dsl.blockscaled_gemm import grouped_gemm_nt_masked
diff --git a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py
index 1ff77818421..0eb2a917036 100644
--- a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py
@@ -43,13 +43,7 @@
 )
 
 if is_flashinfer_available():
-    from flashinfer import (
-        RoutingMethodType,
-        fp4_quantize,
-        reorder_rows_for_gated_act_gemm,
-        shuffle_matrix_a,
-        shuffle_matrix_sf_a,
-    )
+    from flashinfer import RoutingMethodType, fp4_quantize
 
 _is_hip = is_hip()
 _is_cpu_amx_available = cpu_has_amx_support()
diff --git a/python/sglang/srt/layers/moe/moe_runner/triton.py b/python/sglang/srt/layers/moe/moe_runner/triton.py
index 116fdcaa019..8c77d758043 100644
--- a/python/sglang/srt/layers/moe/moe_runner/triton.py
+++ b/python/sglang/srt/layers/moe/moe_runner/triton.py
@@ -51,7 +51,9 @@
 
 
 if _is_cuda or _is_hip:
-    from sgl_kernel import moe_align_block_size as sgl_moe_align_block_size
+    from sgl_kernel import (  # noqa: F401
+        moe_align_block_size as sgl_moe_align_block_size,
+    )
 
 
 @dataclass
diff --git a/python/sglang/srt/layers/moe/rocm_moe_utils.py b/python/sglang/srt/layers/moe/rocm_moe_utils.py
index 5fe2de1e584..efa6bb1bb23 100644
--- a/python/sglang/srt/layers/moe/rocm_moe_utils.py
+++ b/python/sglang/srt/layers/moe/rocm_moe_utils.py
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from enum import IntEnum
-from functools import cache
 from typing import Optional
 
 import torch
diff --git a/python/sglang/srt/layers/moe/token_dispatcher/deepep.py b/python/sglang/srt/layers/moe/token_dispatcher/deepep.py
index 618c4cf9eb1..8667d8747c5 100644
--- a/python/sglang/srt/layers/moe/token_dispatcher/deepep.py
+++ b/python/sglang/srt/layers/moe/token_dispatcher/deepep.py
@@ -3,7 +3,7 @@
 import logging
 from contextlib import nullcontext
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, Dict, List, NamedTuple, Optional, Tuple, Union
+from typing import TYPE_CHECKING, List, NamedTuple, Optional, Tuple, Union
 
 from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
 from sglang.srt.layers.moe.token_dispatcher.base import (
diff --git a/python/sglang/srt/layers/moe/token_dispatcher/mooncake.py b/python/sglang/srt/layers/moe/token_dispatcher/mooncake.py
index d6d56186563..54ba8f1b562 100644
--- a/python/sglang/srt/layers/moe/token_dispatcher/mooncake.py
+++ b/python/sglang/srt/layers/moe/token_dispatcher/mooncake.py
@@ -22,7 +22,7 @@
 except ImportError:
     use_mooncake_ep = False
 
-from enum import Enum, IntEnum, auto
+from enum import Enum, auto
 
 import torch
 import torch.distributed as dist
diff --git a/python/sglang/srt/layers/quantization/awq.py b/python/sglang/srt/layers/quantization/awq.py
index 9cba60c2b53..d796008c888 100644
--- a/python/sglang/srt/layers/quantization/awq.py
+++ b/python/sglang/srt/layers/quantization/awq.py
@@ -3,7 +3,7 @@
 
 import logging
 import warnings
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
 import torch
 
diff --git a/python/sglang/srt/layers/quantization/base_config.py b/python/sglang/srt/layers/quantization/base_config.py
index 4a5b7905eee..18300517702 100644
--- a/python/sglang/srt/layers/quantization/base_config.py
+++ b/python/sglang/srt/layers/quantization/base_config.py
@@ -3,7 +3,6 @@
 
 import inspect
 from abc import ABC, abstractmethod
-from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type
 
 import torch
diff --git a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
index e2ff25e6868..3517bc5e267 100644
--- a/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
+++ b/python/sglang/srt/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@@ -5,7 +5,7 @@
 import enum
 import logging
 from enum import Enum
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, List
 
 import torch
 from compressed_tensors import CompressionFormat
@@ -21,14 +21,7 @@
     per_tensor_dequantize,
     replace_parameter,
 )
-from sglang.srt.utils import (
-    get_bool_env_var,
-    is_cpu,
-    is_cuda,
-    is_hip,
-    is_npu,
-    set_weight_attrs,
-)
+from sglang.srt.utils import get_bool_env_var, is_hip, set_weight_attrs
 
 if TYPE_CHECKING:
     from sglang.srt.layers.moe.fused_moe_triton import FusedMoE
@@ -49,7 +42,7 @@
     from sglang.srt.layers.moe.rocm_moe_utils import rocm_fused_experts_tkw1
 
 try:
-    import vllm
+    import vllm  # noqa: F401
 
     VLLM_AVAILABLE = True
 except ImportError:
diff --git a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py
index 6a7ae00d0d4..9bb34046d51 100644
--- a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py
+++ b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/configurer.py
@@ -12,7 +12,7 @@ def _compute_enable_deep_gemm():
         return False
 
     try:
-        import deep_gemm
+        import deep_gemm  # noqa: F401
     except ImportError:
         return False
 
diff --git a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py
index 02945f44961..1f2f4542a94 100644
--- a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py
+++ b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/entrypoint.py
@@ -5,7 +5,7 @@
 import torch
 
 from sglang.srt.layers.quantization.deep_gemm_wrapper import compile_utils
-from sglang.srt.layers.quantization.deep_gemm_wrapper.configurer import (
+from sglang.srt.layers.quantization.deep_gemm_wrapper.configurer import (  # noqa: F401
     DEEPGEMM_BLACKWELL,
     DEEPGEMM_SCALE_UE8M0,
     ENABLE_JIT_DEEPGEMM,
@@ -17,7 +17,7 @@
 
 if ENABLE_JIT_DEEPGEMM:
     import deep_gemm
-    from deep_gemm.utils.layout import get_mn_major_tma_aligned_tensor
+    from deep_gemm.utils.layout import get_mn_major_tma_aligned_tensor  # noqa: F401
 
 _SANITY_CHECK = get_bool_env_var("SGLANG_DEEPGEMM_SANITY_CHECK")
 
diff --git a/python/sglang/srt/layers/quantization/fp8_kernel.py b/python/sglang/srt/layers/quantization/fp8_kernel.py
index 580f103f212..bd962891663 100644
--- a/python/sglang/srt/layers/quantization/fp8_kernel.py
+++ b/python/sglang/srt/layers/quantization/fp8_kernel.py
@@ -67,7 +67,7 @@
             raise ImportError("aiter is required when SGLANG_USE_AITER is set to True")
     else:
         try:
-            import vllm._C
+            import vllm._C  # noqa: F401
         except ImportError:
             raise ImportError("vllm is required when SGLANG_USE_AITER is set to False")
 
diff --git a/python/sglang/srt/layers/quantization/fpgemm_fp8.py b/python/sglang/srt/layers/quantization/fpgemm_fp8.py
index 5a78626ff3c..0c703010179 100644
--- a/python/sglang/srt/layers/quantization/fpgemm_fp8.py
+++ b/python/sglang/srt/layers/quantization/fpgemm_fp8.py
@@ -11,7 +11,6 @@
 from sglang.srt.layers.linear import LinearBase
 from sglang.srt.layers.parameter import ChannelQuantScaleParameter, ModelWeightParameter
 from sglang.srt.layers.quantization.base_config import (
-    FusedMoEMethodBase,
     LinearMethodBase,
     QuantizationConfig,
     QuantizeMethodBase,
@@ -28,7 +27,7 @@
     prepare_fp8_layer_for_marlin,
 )
 from sglang.srt.layers.quantization.unquant import UnquantizedLinearMethod
-from sglang.srt.layers.quantization.utils import is_layer_skipped, replace_parameter
+from sglang.srt.layers.quantization.utils import is_layer_skipped
 from sglang.srt.utils import get_bool_env_var, is_cuda
 
 _is_cuda = is_cuda()
diff --git a/python/sglang/srt/layers/quantization/gptq.py b/python/sglang/srt/layers/quantization/gptq.py
index ccd3d46f705..be28f07f8c1 100644
--- a/python/sglang/srt/layers/quantization/gptq.py
+++ b/python/sglang/srt/layers/quantization/gptq.py
@@ -199,7 +199,6 @@ def get_quant_method(
         self, layer: torch.nn.Module, prefix: str
     ) -> Optional[LinearMethodBase]:
         # Delay the import to avoid circular dependency
-        from sglang.srt.layers.linear import LinearBase
         from sglang.srt.layers.moe.fused_moe_triton import FusedMoE
 
         if isinstance(layer, FusedMoE):
diff --git a/python/sglang/srt/layers/quantization/int8_kernel.py b/python/sglang/srt/layers/quantization/int8_kernel.py
index 9e92412ac9d..91cba1c3278 100644
--- a/python/sglang/srt/layers/quantization/int8_kernel.py
+++ b/python/sglang/srt/layers/quantization/int8_kernel.py
@@ -8,7 +8,7 @@
 import triton
 import triton.language as tl
 
-from sglang.srt.utils import get_bool_env_var, get_device_name, is_cuda
+from sglang.srt.utils import get_device_name, is_cuda
 
 _is_cuda = is_cuda()
 if _is_cuda:
diff --git a/python/sglang/srt/layers/quantization/modelopt_quant.py b/python/sglang/srt/layers/quantization/modelopt_quant.py
index d5c1db3a84c..f1c6dafb592 100755
--- a/python/sglang/srt/layers/quantization/modelopt_quant.py
+++ b/python/sglang/srt/layers/quantization/modelopt_quant.py
@@ -1059,16 +1059,7 @@ def prepare_static_weights_for_kernel(
         intermediate_size,
         num_experts,
     ):
-        from flashinfer import (
-            RoutingMethodType,
-            e2m1_and_ufp8sf_scale_to_float,
-            fp4_quantize,
-            next_positive_power_of_2,
-            nvfp4_block_scale_interleave,
-            reorder_rows_for_gated_act_gemm,
-            shuffle_matrix_a,
-            shuffle_matrix_sf_a,
-        )
+        from flashinfer import nvfp4_block_scale_interleave
         from flashinfer.fused_moe.core import (
             _maybe_get_cached_w2_permute_indices,
             _maybe_get_cached_w3_w1_permute_indices,
diff --git a/python/sglang/srt/layers/quantization/petit.py b/python/sglang/srt/layers/quantization/petit.py
index 2c608507c9c..daac52ee2e0 100644
--- a/python/sglang/srt/layers/quantization/petit.py
+++ b/python/sglang/srt/layers/quantization/petit.py
@@ -2,7 +2,7 @@
 
 
 import logging
-from typing import Any, Callable, Dict, List, Optional
+from typing import Any, Dict, List, Optional
 
 import regex as re
 import torch
diff --git a/python/sglang/srt/layers/quantization/quark/quark_moe.py b/python/sglang/srt/layers/quantization/quark/quark_moe.py
index d1ad13f4810..3d2d52cd22e 100644
--- a/python/sglang/srt/layers/quantization/quark/quark_moe.py
+++ b/python/sglang/srt/layers/quantization/quark/quark_moe.py
@@ -3,16 +3,16 @@
 from __future__ import annotations
 
 import logging
-from typing import TYPE_CHECKING, Any, Callable, Optional
+from typing import TYPE_CHECKING, Any
 
 import torch
-from aiter import ActivationType, QuantType, biased_grouped_topk
+from aiter import ActivationType, QuantType
 from aiter.fused_moe import fused_moe
 from aiter.utility.fp4_utils import e8m0_shuffle
 
 from sglang.srt.layers.moe import MoeRunnerConfig
 from sglang.srt.layers.quantization.base_config import FusedMoEMethodBase
-from sglang.srt.utils import get_bool_env_var, is_hip, mxfp_supported, set_weight_attrs
+from sglang.srt.utils import is_hip, set_weight_attrs
 
 if TYPE_CHECKING:
     from sglang.srt.layers.moe.token_dispatcher import (
diff --git a/python/sglang/srt/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py b/python/sglang/srt/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py
index a0787baaf0f..a8322b4963d 100644
--- a/python/sglang/srt/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py
+++ b/python/sglang/srt/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py
@@ -2,20 +2,13 @@
 
 from typing import Any, Callable, Optional
 
-import aiter
 import torch
-import torch.nn.functional as F
-from aiter.ops.gemm_op_a4w4 import gemm_a4w4
-from aiter.ops.shuffle import shuffle_weight
 from aiter.ops.triton.gemm_afp4wfp4 import gemm_afp4wfp4
 from aiter.ops.triton.gemm_afp4wfp4_pre_quant_atomic import gemm_afp4wfp4_pre_quant
 from aiter.ops.triton.quant import dynamic_mxfp4_quant
-from aiter.utility import dtypes
-from aiter.utility.fp4_utils import e8m0_shuffle
 
 from sglang.srt.layers.parameter import GroupQuantScaleParameter, PackedvLLMParameter
 from sglang.srt.layers.quantization.quark.schemes import QuarkScheme
-from sglang.srt.utils import get_bool_env_var
 
 __all__ = ["QuarkW4A4MXFP4"]
 
diff --git a/python/sglang/srt/layers/quantization/utils.py b/python/sglang/srt/layers/quantization/utils.py
index 63b8b6eb797..d407b95f277 100644
--- a/python/sglang/srt/layers/quantization/utils.py
+++ b/python/sglang/srt/layers/quantization/utils.py
@@ -11,7 +11,6 @@
 import torch
 
 from sglang.srt.layers.quantization.fp8_kernel import scaled_fp8_quant
-from sglang.srt.utils import is_cuda
 
 if TYPE_CHECKING:
     from sglang.srt.layers.quantization.base_config import QuantizationConfig
diff --git a/python/sglang/srt/layers/quantization/w4afp8.py b/python/sglang/srt/layers/quantization/w4afp8.py
index e97de07d799..7c5d4554a67 100644
--- a/python/sglang/srt/layers/quantization/w4afp8.py
+++ b/python/sglang/srt/layers/quantization/w4afp8.py
@@ -1,14 +1,13 @@
 from __future__ import annotations
 
 import logging
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
 import torch
 from torch.nn import Module
 from torch.nn.parameter import Parameter
 
-from sglang.srt.distributed.parallel_state import get_moe_expert_parallel_world_size
-from sglang.srt.layers.linear import LinearBase, UnquantizedLinearMethod
+from sglang.srt.layers.linear import UnquantizedLinearMethod
 from sglang.srt.layers.quantization.base_config import (
     FusedMoEMethodBase,
     QuantizationConfig,
@@ -17,11 +16,11 @@
 from sglang.srt.layers.quantization.fp8 import Fp8LinearMethod
 from sglang.srt.layers.quantization.unquant import UnquantizedLinearMethod
 from sglang.srt.layers.quantization.utils import is_layer_skipped
-from sglang.srt.utils import is_npu, set_weight_attrs
+from sglang.srt.utils import set_weight_attrs
 
 if TYPE_CHECKING:
     from sglang.srt.layers.moe import MoeRunnerConfig
-    from sglang.srt.layers.moe.ep_moe.layer import DeepEPMoE, EPMoE
+    from sglang.srt.layers.moe.ep_moe.layer import DeepEPMoE
     from sglang.srt.layers.moe.token_dispatcher import (
         CombineInput,
         DeepEPNormalOutput,
diff --git a/python/sglang/srt/layers/quantization/w8a8_int8.py b/python/sglang/srt/layers/quantization/w8a8_int8.py
index 17a79190df7..77be31163ec 100644
--- a/python/sglang/srt/layers/quantization/w8a8_int8.py
+++ b/python/sglang/srt/layers/quantization/w8a8_int8.py
@@ -1,28 +1,12 @@
 from __future__ import annotations
 
-import importlib
-import sys
 from types import MappingProxyType
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Callable,
-    Dict,
-    List,
-    Mapping,
-    Optional,
-    Tuple,
-    Union,
-    cast,
-)
+from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple, Union, cast
 
 import torch
 from torch.nn.parameter import Parameter
 
-from sglang.srt.distributed import (
-    get_tensor_model_parallel_rank,
-    get_tensor_model_parallel_world_size,
-)
+from sglang.srt.distributed import get_tensor_model_parallel_world_size
 from sglang.srt.layers.amx_utils import _amx_process_weight_after_loading
 from sglang.srt.layers.moe import MoeRunner, MoeRunnerBackend, MoeRunnerConfig
 from sglang.srt.layers.moe.moe_runner.triton import TritonMoeQuantInfo
diff --git a/python/sglang/srt/layers/utils.py b/python/sglang/srt/layers/utils.py
index 45e15479128..e88f3a938ad 100644
--- a/python/sglang/srt/layers/utils.py
+++ b/python/sglang/srt/layers/utils.py
@@ -1,6 +1,5 @@
 import logging
 import re
-from functools import lru_cache
 
 import torch
 
diff --git a/python/sglang/srt/lora/backend/triton_backend.py b/python/sglang/srt/lora/backend/triton_backend.py
index f99e2c006c7..722915efc51 100644
--- a/python/sglang/srt/lora/backend/triton_backend.py
+++ b/python/sglang/srt/lora/backend/triton_backend.py
@@ -11,7 +11,6 @@
 )
 from sglang.srt.lora.utils import LoRABatchInfo
 from sglang.srt.model_executor.forward_batch_info import ForwardBatch
-from sglang.srt.server_args import ServerArgs
 
 
 class TritonLoRABackend(BaseLoRABackend):
diff --git a/python/sglang/srt/lora/eviction_policy.py b/python/sglang/srt/lora/eviction_policy.py
index 7d1f5f91adf..d4b29612f06 100644
--- a/python/sglang/srt/lora/eviction_policy.py
+++ b/python/sglang/srt/lora/eviction_policy.py
@@ -20,7 +20,7 @@
 import time
 from abc import ABC, abstractmethod
 from collections import OrderedDict
-from typing import Any, Dict, List, Optional, Set
+from typing import Optional, Set
 
 logger = logging.getLogger(__name__)
 
diff --git a/python/sglang/srt/lora/lora_manager.py b/python/sglang/srt/lora/lora_manager.py
index 30d3386e28d..19ff874dc1d 100644
--- a/python/sglang/srt/lora/lora_manager.py
+++ b/python/sglang/srt/lora/lora_manager.py
@@ -16,7 +16,7 @@
 # and "Punica: Multi-Tenant LoRA Serving"
 
 import logging
-from typing import Dict, Iterable, List, Optional, Set, Tuple
+from typing import Dict, Iterable, List, Optional
 
 import torch
 
diff --git a/python/sglang/srt/managers/cache_controller.py b/python/sglang/srt/managers/cache_controller.py
index f36d61ee09a..b5c4aa17234 100644
--- a/python/sglang/srt/managers/cache_controller.py
+++ b/python/sglang/srt/managers/cache_controller.py
@@ -14,11 +14,10 @@
 """
 
 import logging
-import math
 import threading
 import time
-from queue import Empty, Full, PriorityQueue, Queue
-from typing import TYPE_CHECKING, List, NamedTuple, Optional, Set, Tuple
+from queue import Empty, Full, Queue
+from typing import TYPE_CHECKING, List, NamedTuple, Optional
 
 import torch
 
@@ -41,7 +40,7 @@
     get_attention_tp_size,
     is_dp_attention_enabled,
 )
-from sglang.srt.mem_cache.memory_pool import MHATokenToKVPool, MLATokenToKVPool
+from sglang.srt.mem_cache.memory_pool import MLATokenToKVPool
 
 logger = logging.getLogger(__name__)
 
diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py
index 264d89bb9d0..a39a7a53536 100644
--- a/python/sglang/srt/managers/schedule_batch.py
+++ b/python/sglang/srt/managers/schedule_batch.py
@@ -59,11 +59,10 @@
     SWATokenToKVPoolAllocator,
 )
 from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache
-from sglang.srt.mem_cache.chunk_cache import ChunkCache, SWAChunkCache
+from sglang.srt.mem_cache.chunk_cache import SWAChunkCache
 from sglang.srt.mem_cache.common import (
     alloc_for_decode,
     alloc_for_extend,
-    alloc_token_slots,
     evict_from_tree_cache,
 )
 from sglang.srt.mem_cache.mamba_radix_cache import MambaRadixCache
@@ -76,7 +75,6 @@
 from sglang.srt.sampling.sampling_params import SamplingParams
 from sglang.srt.server_args import ServerArgs, get_global_server_args
 from sglang.srt.utils import flatten_nested_list
-from sglang.srt.utils.common import next_power_of_2
 
 if TYPE_CHECKING:
     from sglang.srt.configs.model_config import ModelConfig
diff --git a/python/sglang/srt/managers/scheduler_metrics_mixin.py b/python/sglang/srt/managers/scheduler_metrics_mixin.py
index 91fff9e9b36..34832f3e316 100644
--- a/python/sglang/srt/managers/scheduler_metrics_mixin.py
+++ b/python/sglang/srt/managers/scheduler_metrics_mixin.py
@@ -3,13 +3,10 @@
 import logging
 import time
 from collections import defaultdict
-from typing import TYPE_CHECKING, Dict, List, Optional, Union
-
-import torch
+from typing import TYPE_CHECKING, List, Optional
 
 from sglang.srt.disaggregation.kv_events import EventPublisherFactory, KVEventBatch
 from sglang.srt.disaggregation.utils import DisaggregationMode
-from sglang.srt.managers.io_struct import TokenizedGenerateReqInput
 from sglang.srt.managers.schedule_policy import PrefillAdder
 from sglang.srt.managers.scheduler import Req, ScheduleBatch
 from sglang.srt.metrics.collector import SchedulerMetricsCollector, SchedulerStats
diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py
index 03c15fde952..3e325ca4d0b 100644
--- a/python/sglang/srt/managers/tokenizer_manager.py
+++ b/python/sglang/srt/managers/tokenizer_manager.py
@@ -16,7 +16,6 @@
 import asyncio
 import copy
 import dataclasses
-import json
 import logging
 import math
 import os
diff --git a/python/sglang/srt/managers/utils.py b/python/sglang/srt/managers/utils.py
index ccd3f0fe2d8..fa343519849 100644
--- a/python/sglang/srt/managers/utils.py
+++ b/python/sglang/srt/managers/utils.py
@@ -1,8 +1,7 @@
 from __future__ import annotations
 
 import logging
-import multiprocessing as mp
-from typing import TYPE_CHECKING, Dict, List, Optional
+from typing import TYPE_CHECKING, Optional
 
 from sglang.srt.layers.logits_processor import LogitsProcessorOutput
 from sglang.srt.managers.schedule_batch import Req
diff --git a/python/sglang/srt/mem_cache/allocator_ascend.py b/python/sglang/srt/mem_cache/allocator_ascend.py
index 2c606187a95..4adbf592a24 100644
--- a/python/sglang/srt/mem_cache/allocator_ascend.py
+++ b/python/sglang/srt/mem_cache/allocator_ascend.py
@@ -92,7 +92,7 @@ def alloc_extend(
         )
 
         if num_new_pages_item < 200:
-            import sgl_kernel_npu
+            import sgl_kernel_npu  # noqa: F401
 
             torch.ops.npu.alloc_extend(
                 prefix_lens,
diff --git a/python/sglang/srt/mem_cache/base_prefix_cache.py b/python/sglang/srt/mem_cache/base_prefix_cache.py
index 34df996893f..fb85497c329 100644
--- a/python/sglang/srt/mem_cache/base_prefix_cache.py
+++ b/python/sglang/srt/mem_cache/base_prefix_cache.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Any, List, NamedTuple, Optional, Tuple
+from typing import TYPE_CHECKING, Any, NamedTuple, Optional, Tuple
 
 import torch
 
diff --git a/python/sglang/srt/mem_cache/evict_policy.py b/python/sglang/srt/mem_cache/evict_policy.py
index ddd2ab6c31a..491d3d846be 100644
--- a/python/sglang/srt/mem_cache/evict_policy.py
+++ b/python/sglang/srt/mem_cache/evict_policy.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, List, Tuple, Union
+from typing import TYPE_CHECKING, Tuple, Union
 
 if TYPE_CHECKING:
     from sglang.srt.mem_cache.radix_cache import TreeNode
diff --git a/python/sglang/srt/mem_cache/mamba_radix_cache.py b/python/sglang/srt/mem_cache/mamba_radix_cache.py
index 7467daa5d56..739b204ed99 100644
--- a/python/sglang/srt/mem_cache/mamba_radix_cache.py
+++ b/python/sglang/srt/mem_cache/mamba_radix_cache.py
@@ -22,7 +22,6 @@
 import heapq
 import time
 from collections import defaultdict
-from functools import partial
 from typing import TYPE_CHECKING, List, Optional, Tuple
 
 import torch
@@ -33,7 +32,6 @@
 from sglang.srt.mem_cache.radix_cache import (
     RadixKey,
     _key_match_page_size1,
-    _key_match_paged,
     get_child_key,
 )
 
diff --git a/python/sglang/srt/mem_cache/memory_pool_host.py b/python/sglang/srt/mem_cache/memory_pool_host.py
index f6d655af095..edfae2cfe94 100644
--- a/python/sglang/srt/mem_cache/memory_pool_host.py
+++ b/python/sglang/srt/mem_cache/memory_pool_host.py
@@ -1,7 +1,6 @@
 import abc
 import logging
 import threading
-from enum import IntEnum
 from functools import wraps
 from typing import Optional
 
diff --git a/python/sglang/srt/mem_cache/multimodal_cache.py b/python/sglang/srt/mem_cache/multimodal_cache.py
index 63a1775430c..42c31a8e866 100644
--- a/python/sglang/srt/mem_cache/multimodal_cache.py
+++ b/python/sglang/srt/mem_cache/multimodal_cache.py
@@ -1,6 +1,5 @@
 import logging
 from collections import OrderedDict
-from typing import Dict
 
 import torch
 
diff --git a/python/sglang/srt/mem_cache/radix_cache.py b/python/sglang/srt/mem_cache/radix_cache.py
index f8259433092..9009d4e926b 100644
--- a/python/sglang/srt/mem_cache/radix_cache.py
+++ b/python/sglang/srt/mem_cache/radix_cache.py
@@ -23,7 +23,7 @@
 import time
 from collections import defaultdict
 from functools import lru_cache, partial
-from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Iterator, List, Optional, Tuple, Union
 
 import torch
 
diff --git a/python/sglang/srt/mem_cache/storage/aibrix_kvcache/unit_test.py b/python/sglang/srt/mem_cache/storage/aibrix_kvcache/unit_test.py
index 2e54e9816f9..14494d81980 100644
--- a/python/sglang/srt/mem_cache/storage/aibrix_kvcache/unit_test.py
+++ b/python/sglang/srt/mem_cache/storage/aibrix_kvcache/unit_test.py
@@ -3,20 +3,8 @@
 
 import torch
 import torch.distributed
-from aibrix_kvcache import (
-    BaseKVCacheManager,
-    GroupAwareKVCacheManager,
-    KVCacheBlockLayout,
-    KVCacheBlockSpec,
-    KVCacheConfig,
-    KVCacheMetrics,
-    KVCacheTensorSpec,
-    ModelSpec,
-    TokenListView,
-)
-from aibrix_kvcache.common.absl_logging import getLogger, log_every_n_seconds, log_if
+from aibrix_kvcache.common.absl_logging import log_every_n_seconds
 from aibrix_kvcache_storage import AibrixKVCacheStorage
-from torch.distributed import Backend, ProcessGroup
 
 from sglang.srt.mem_cache.hicache_storage import HiCacheStorageConfig
 from sglang.srt.mem_cache.memory_pool import MHATokenToKVPool
diff --git a/python/sglang/srt/mem_cache/storage/eic/eic_storage.py b/python/sglang/srt/mem_cache/storage/eic/eic_storage.py
index 0acd5b65fd3..f3cc1563257 100644
--- a/python/sglang/srt/mem_cache/storage/eic/eic_storage.py
+++ b/python/sglang/srt/mem_cache/storage/eic/eic_storage.py
@@ -2,21 +2,18 @@
 import logging
 import os
 import time
-import uuid
-from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, List, Optional, Tuple
 
 import eic
 import torch
 import yaml
 
-from sglang.srt.layers.dp_attention import get_attention_tp_rank, get_attention_tp_size
 from sglang.srt.mem_cache.hicache_storage import (
     HiCacheStorage,
     HiCacheStorageConfig,
     HiCacheStorageExtraInfo,
 )
-from sglang.srt.mem_cache.memory_pool_host import HostKVCache, MLATokenToKVPoolHost
+from sglang.srt.mem_cache.memory_pool_host import HostKVCache
 
 logger = logging.getLogger(__name__)
 
diff --git a/python/sglang/srt/mem_cache/storage/hf3fs/hf3fs_client.py b/python/sglang/srt/mem_cache/storage/hf3fs/hf3fs_client.py
index c7a485fa048..d789a205348 100644
--- a/python/sglang/srt/mem_cache/storage/hf3fs/hf3fs_client.py
+++ b/python/sglang/srt/mem_cache/storage/hf3fs/hf3fs_client.py
@@ -1,6 +1,5 @@
 import logging
 import os
-import threading
 from abc import ABC, abstractmethod
 from typing import List
 
diff --git a/python/sglang/srt/mem_cache/storage/lmcache/lmc_radix_cache.py b/python/sglang/srt/mem_cache/storage/lmcache/lmc_radix_cache.py
index bf31cbb3894..9fdadf6ac2b 100644
--- a/python/sglang/srt/mem_cache/storage/lmcache/lmc_radix_cache.py
+++ b/python/sglang/srt/mem_cache/storage/lmcache/lmc_radix_cache.py
@@ -2,7 +2,7 @@
 
 import logging
 import threading
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, Optional
 
 import torch
 
diff --git a/python/sglang/srt/mem_cache/storage/nixl/hicache_nixl.py b/python/sglang/srt/mem_cache/storage/nixl/hicache_nixl.py
index 55b3dd976a0..8965acb4aaa 100644
--- a/python/sglang/srt/mem_cache/storage/nixl/hicache_nixl.py
+++ b/python/sglang/srt/mem_cache/storage/nixl/hicache_nixl.py
@@ -1,9 +1,8 @@
-import hashlib
 import logging
 import os
 import time
 import uuid
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, List, Optional, Union
 
 import torch
 
diff --git a/python/sglang/srt/mem_cache/storage/nixl/nixl_utils.py b/python/sglang/srt/mem_cache/storage/nixl/nixl_utils.py
index 6e3d2a900cc..b04f9e58d84 100644
--- a/python/sglang/srt/mem_cache/storage/nixl/nixl_utils.py
+++ b/python/sglang/srt/mem_cache/storage/nixl/nixl_utils.py
@@ -1,6 +1,6 @@
 import logging
 import os
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, List, Optional, Tuple, Union
 
 import torch
 
diff --git a/python/sglang/srt/mem_cache/storage/nixl/test_hicache_nixl_storage.py b/python/sglang/srt/mem_cache/storage/nixl/test_hicache_nixl_storage.py
index 3784ab91ad1..aea004a6d72 100755
--- a/python/sglang/srt/mem_cache/storage/nixl/test_hicache_nixl_storage.py
+++ b/python/sglang/srt/mem_cache/storage/nixl/test_hicache_nixl_storage.py
@@ -2,7 +2,7 @@
 
 import os
 import unittest
-from typing import List, Optional
+from typing import List
 from unittest.mock import MagicMock
 
 import torch
diff --git a/python/sglang/srt/metrics/func_timer.py b/python/sglang/srt/metrics/func_timer.py
index fbb01bac806..51d445ab44e 100644
--- a/python/sglang/srt/metrics/func_timer.py
+++ b/python/sglang/srt/metrics/func_timer.py
@@ -18,7 +18,7 @@
 import asyncio
 import time
 from functools import wraps
-from typing import Any, Callable, List, Optional
+from typing import Any, Callable, Optional
 
 from sglang.srt.metrics.utils import exponential_buckets
 
diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py
index b1b8b7ff3fb..ef780899dd9 100644
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -104,11 +104,7 @@
 )
 from sglang.srt.model_executor.cpu_graph_runner import CPUGraphRunner
 from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
-from sglang.srt.model_executor.forward_batch_info import (
-    ForwardBatch,
-    ForwardMode,
-    PPProxyTensors,
-)
+from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
 from sglang.srt.model_executor.npu_graph_runner import NPUGraphRunner
 from sglang.srt.model_executor.piecewise_cuda_graph_runner import (
     PiecewiseCudaGraphRunner,
diff --git a/python/sglang/srt/model_executor/npu_graph_runner.py b/python/sglang/srt/model_executor/npu_graph_runner.py
index db7dcd15943..cfd9abbcf21 100644
--- a/python/sglang/srt/model_executor/npu_graph_runner.py
+++ b/python/sglang/srt/model_executor/npu_graph_runner.py
@@ -19,10 +19,9 @@
 import threading
 from typing import TYPE_CHECKING, Optional, Union
 
-import numpy as np
 import torch
 
-from sglang.srt.configs.model_config import AttentionArch, is_deepseek_nsa
+from sglang.srt.configs.model_config import is_deepseek_nsa
 from sglang.srt.model_executor.cuda_graph_runner import CudaGraphRunner
 
 logger = logging.getLogger(__name__)
diff --git a/python/sglang/srt/models/bailing_moe.py b/python/sglang/srt/models/bailing_moe.py
index 2cb7d596104..e768c0a53a9 100644
--- a/python/sglang/srt/models/bailing_moe.py
+++ b/python/sglang/srt/models/bailing_moe.py
@@ -19,7 +19,7 @@
 # limitations under the License.
 """SGLang BailingMoE model."""
 import logging
-from typing import Any, Dict, Iterable, Optional, Tuple, Union
+from typing import Iterable, Optional, Tuple, Union
 
 import torch
 import torch.nn.functional as F
@@ -59,7 +59,6 @@
 from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
 from sglang.srt.layers.moe.token_dispatcher import DeepEPDispatcher
 from sglang.srt.layers.moe.topk import TopK
-from sglang.srt.layers.moe.utils import DeepEPMode
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
 from sglang.srt.layers.radix_attention import RadixAttention
 from sglang.srt.layers.rotary_embedding import get_rope
diff --git a/python/sglang/srt/models/bert.py b/python/sglang/srt/models/bert.py
index d7f3301c656..45494423fe8 100644
--- a/python/sglang/srt/models/bert.py
+++ b/python/sglang/srt/models/bert.py
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
-from typing import Any, Dict, Iterable, Optional, Set, Tuple
+from typing import Iterable, Optional, Set, Tuple
 
 import torch
 from torch import nn
diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py
index fb9cd4f6c9f..f24923a73a5 100644
--- a/python/sglang/srt/models/deepseek_v2.py
+++ b/python/sglang/srt/models/deepseek_v2.py
@@ -183,9 +183,9 @@
         awq_dequantize_triton as awq_dequantize,
     )
 elif _is_npu:
-    import custom_ops
-    import sgl_kernel_npu
-    import torch_npu
+    import custom_ops  # noqa: F401
+    import sgl_kernel_npu  # noqa: F401
+    import torch_npu  # noqa: F401
 else:
     pass
 
diff --git a/python/sglang/srt/models/dots_ocr.py b/python/sglang/srt/models/dots_ocr.py
index ee48909ed18..d1f60feccb5 100644
--- a/python/sglang/srt/models/dots_ocr.py
+++ b/python/sglang/srt/models/dots_ocr.py
@@ -6,7 +6,6 @@
 
 import torch
 import torch.nn as nn
-from transformers.activations import ACT2FN
 
 from sglang.srt.configs import DotsOCRConfig
 from sglang.srt.layers.logits_processor import LogitsProcessor
@@ -22,7 +21,6 @@
 from sglang.srt.models.dots_vlm_vit import DotsVisionTransformer
 from sglang.srt.models.qwen2 import Qwen2ForCausalLM
 from sglang.srt.utils import add_prefix
-from sglang.srt.utils.hf_transformers_utils import get_processor
 
 logger = logging.getLogger(__name__)
 
diff --git a/python/sglang/srt/models/dots_vlm.py b/python/sglang/srt/models/dots_vlm.py
index 95475058f5e..d626b1ef6ad 100644
--- a/python/sglang/srt/models/dots_vlm.py
+++ b/python/sglang/srt/models/dots_vlm.py
@@ -23,7 +23,6 @@
 from torch import nn
 
 from sglang.srt.configs.dots_vlm import DotsVLMConfig
-from sglang.srt.distributed import parallel_state
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
 from sglang.srt.managers.mm_utils import (
     MultiModalityDataPaddingPatternMultimodalTokens,
diff --git a/python/sglang/srt/models/falcon_h1.py b/python/sglang/srt/models/falcon_h1.py
index c35613bcb2e..0fab9e410d0 100644
--- a/python/sglang/srt/models/falcon_h1.py
+++ b/python/sglang/srt/models/falcon_h1.py
@@ -1,4 +1,3 @@
-import enum
 import logging
 from typing import Any, Iterable, List, Optional, Set, Tuple
 
diff --git a/python/sglang/srt/models/gemma3n_mm.py b/python/sglang/srt/models/gemma3n_mm.py
index 3c52635dd9e..86f7fd516dc 100644
--- a/python/sglang/srt/models/gemma3n_mm.py
+++ b/python/sglang/srt/models/gemma3n_mm.py
@@ -14,8 +14,7 @@
 )
 from transformers.models.auto.modeling_auto import AutoModel
 
-from sglang.srt.layers.layernorm import RMSNorm
-from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
+from sglang.srt.layers.linear import RowParallelLinear
 from sglang.srt.layers.logits_processor import LogitsProcessor
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
 from sglang.srt.layers.vocab_parallel_embedding import VocabParallelEmbedding
diff --git a/python/sglang/srt/models/glm4_moe.py b/python/sglang/srt/models/glm4_moe.py
index 35ce0c40db5..2d4bf41f12d 100644
--- a/python/sglang/srt/models/glm4_moe.py
+++ b/python/sglang/srt/models/glm4_moe.py
@@ -44,10 +44,8 @@
 )
 from sglang.srt.layers.layernorm import RMSNorm
 from sglang.srt.layers.linear import (
-    ColumnParallelLinear,
     MergedColumnParallelLinear,
     QKVParallelLinear,
-    ReplicatedLinear,
     RowParallelLinear,
 )
 from sglang.srt.layers.logits_processor import LogitsProcessor
@@ -78,16 +76,12 @@
     BumpAllocator,
     LazyValue,
     add_prefix,
-    bind_or_assign,
     cpu_has_amx_support,
     get_bool_env_var,
     get_device_sm,
-    get_int_env_var,
     is_cpu,
     is_cuda,
-    is_flashinfer_available,
     is_hip,
-    is_non_idle_and_non_empty,
     log_info_on_rank0,
     use_intel_amx_backend,
 )
diff --git a/python/sglang/srt/models/gpt_oss.py b/python/sglang/srt/models/gpt_oss.py
index 1f280f37ef9..6d80adf0fac 100644
--- a/python/sglang/srt/models/gpt_oss.py
+++ b/python/sglang/srt/models/gpt_oss.py
@@ -85,7 +85,7 @@
 
 
 if _is_cuda:
-    from sgl_kernel import FusedSetKVBufferArg
+    from sgl_kernel import FusedSetKVBufferArg  # noqa: F401
 
 
 class GptOssConfig(PretrainedConfig):
diff --git a/python/sglang/srt/models/hunyuan.py b/python/sglang/srt/models/hunyuan.py
index c1ed2543c62..7c6fd9e48a7 100644
--- a/python/sglang/srt/models/hunyuan.py
+++ b/python/sglang/srt/models/hunyuan.py
@@ -12,18 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Inference-only HunYuan model compatible with HuggingFace weights."""
-import logging
 import re
-from dataclasses import dataclass
-from enum import Enum, auto
-from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, Optional, Tuple
 
 import torch
 from torch import nn
 from transformers import PretrainedConfig
 
 from sglang.srt.distributed import (
-    get_pp_group,
     get_tensor_model_parallel_rank,
     get_tensor_model_parallel_world_size,
     tensor_model_parallel_all_reduce,
@@ -46,7 +42,6 @@
 from sglang.srt.layers.rotary_embedding import get_rope
 from sglang.srt.layers.sampler import Sampler
 from sglang.srt.layers.vocab_parallel_embedding import (
-    DEFAULT_VOCAB_PADDING_SIZE,
     ParallelLMHead,
     VocabParallelEmbedding,
 )
@@ -56,7 +51,7 @@
     kv_cache_scales_loader,
     maybe_remap_kv_scale_name,
 )
-from sglang.srt.utils import add_prefix, is_hip
+from sglang.srt.utils import is_hip
 
 expert_distribution_recorder = ExpertDistributionRecorder()
 
diff --git a/python/sglang/srt/models/interns1.py b/python/sglang/srt/models/interns1.py
index c7383ed2583..e896843ff02 100644
--- a/python/sglang/srt/models/interns1.py
+++ b/python/sglang/srt/models/interns1.py
@@ -5,7 +5,6 @@
 from transformers import PretrainedConfig
 
 from sglang.srt.layers.attention import vision_utils
-from sglang.srt.layers.moe.ep_moe.layer import get_moe_impl_class
 from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
 from sglang.srt.managers.mm_utils import (
diff --git a/python/sglang/srt/models/llama_eagle3.py b/python/sglang/srt/models/llama_eagle3.py
index 87ae7ade5d5..d0605d08de5 100644
--- a/python/sglang/srt/models/llama_eagle3.py
+++ b/python/sglang/srt/models/llama_eagle3.py
@@ -27,7 +27,7 @@
 
 from sglang.srt.distributed import get_pp_group
 from sglang.srt.layers.layernorm import RMSNorm
-from sglang.srt.layers.linear import QKVParallelLinear, RowParallelLinear
+from sglang.srt.layers.linear import QKVParallelLinear
 from sglang.srt.layers.logits_processor import LogitsProcessor
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
 from sglang.srt.layers.vocab_parallel_embedding import (
diff --git a/python/sglang/srt/models/longcat_flash.py b/python/sglang/srt/models/longcat_flash.py
index edfadfa0a1b..ffca2bad09a 100644
--- a/python/sglang/srt/models/longcat_flash.py
+++ b/python/sglang/srt/models/longcat_flash.py
@@ -44,9 +44,7 @@
 )
 from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
 from sglang.srt.eplb.expert_location import ModelConfigForExpertLocation
-from sglang.srt.eplb.expert_location_dispatch import ExpertLocationDispatchInfo
 from sglang.srt.layers.activation import SiluAndMul
-from sglang.srt.layers.amx_utils import PackWeightMethod
 from sglang.srt.layers.communicator import LayerCommunicator, LayerScatterModes
 from sglang.srt.layers.dp_attention import (
     get_attention_tp_rank,
@@ -87,20 +85,15 @@
 from sglang.srt.server_args import get_global_server_args
 from sglang.srt.utils import (
     BumpAllocator,
-    LazyValue,
     add_prefix,
     bind_or_assign,
     cpu_has_amx_support,
     get_bool_env_var,
     get_device_sm,
-    get_int_env_var,
     is_cpu,
     is_cuda,
-    is_flashinfer_available,
     is_hip,
-    is_non_idle_and_non_empty,
     is_npu,
-    is_sm100_supported,
 )
 
 _is_hip = is_hip()
@@ -113,13 +106,7 @@
 _device_sm = get_device_sm()
 
 if _is_cuda:
-    from sgl_kernel import (
-        awq_dequantize,
-        bmm_fp8,
-        dsv3_fused_a_gemm,
-        dsv3_router_gemm,
-        merge_state_v2,
-    )
+    from sgl_kernel import awq_dequantize
 elif _is_cpu and _is_cpu_amx_available:
     pass
 elif _is_hip:
diff --git a/python/sglang/srt/models/longcat_flash_nextn.py b/python/sglang/srt/models/longcat_flash_nextn.py
index 69bd1548d4e..a6092785acc 100644
--- a/python/sglang/srt/models/longcat_flash_nextn.py
+++ b/python/sglang/srt/models/longcat_flash_nextn.py
@@ -32,14 +32,10 @@
 
 import concurrent.futures
 import logging
-import os
-from enum import IntEnum, auto
-from typing import Any, Dict, Iterable, Optional, Tuple, Union
+from typing import Iterable, Optional, Tuple
 
 import torch
-import torch.nn.functional as F
 from torch import nn
-from tqdm import tqdm
 
 from sglang.srt.configs import LongcatFlashConfig
 from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
@@ -75,7 +71,6 @@
 from sglang.srt.models.longcat_flash import LongcatFlashForCausalLM, LongcatFlashMLP
 from sglang.srt.utils import (
     BumpAllocator,
-    LazyValue,
     add_prefix,
     bind_or_assign,
     cpu_has_amx_support,
@@ -97,13 +92,7 @@
 _device_sm = get_device_sm()
 
 if _is_cuda:
-    from sgl_kernel import (
-        awq_dequantize,
-        bmm_fp8,
-        dsv3_fused_a_gemm,
-        dsv3_router_gemm,
-        merge_state_v2,
-    )
+    from sgl_kernel import awq_dequantize
 elif _is_cpu and _is_cpu_amx_available:
     pass
 elif _is_hip:
diff --git a/python/sglang/srt/models/mimo.py b/python/sglang/srt/models/mimo.py
index 2a89e7706e3..15aad8f41c5 100644
--- a/python/sglang/srt/models/mimo.py
+++ b/python/sglang/srt/models/mimo.py
@@ -1,28 +1,17 @@
 # Adapted from qwen2.py
 
-from functools import partial
-from typing import Any, Dict, Iterable, Optional, Tuple
+from typing import Iterable, Optional, Tuple
 
 import torch
 from torch import nn
 
-from sglang.srt.distributed import (
-    get_tensor_model_parallel_rank,
-    get_tensor_model_parallel_world_size,
-    split_tensor_along_last_dim,
-    tensor_model_parallel_all_gather,
-)
-from sglang.srt.layers.layernorm import RMSNorm
-from sglang.srt.layers.linear import QKVParallelLinear, RowParallelLinear
 from sglang.srt.layers.logits_processor import LogitsProcessor
 from sglang.srt.layers.pooler import Pooler, PoolingType
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
-from sglang.srt.layers.radix_attention import RadixAttention
-from sglang.srt.layers.rotary_embedding import get_rope
 from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
 from sglang.srt.model_executor.forward_batch_info import ForwardBatch
 from sglang.srt.model_loader.weight_utils import default_weight_loader
-from sglang.srt.models.qwen2 import Qwen2DecoderLayer, Qwen2MLP, Qwen2Model
+from sglang.srt.models.qwen2 import Qwen2DecoderLayer, Qwen2Model
 from sglang.srt.utils import add_prefix
 
 MiMoConfig = None
diff --git a/python/sglang/srt/models/mimo_mtp.py b/python/sglang/srt/models/mimo_mtp.py
index 89e8c02cd62..2702a637d46 100644
--- a/python/sglang/srt/models/mimo_mtp.py
+++ b/python/sglang/srt/models/mimo_mtp.py
@@ -1,7 +1,6 @@
 # Adapted from https://github.com/vllm-project/vllm/pull/17433/files  and deepseek_nextn.py
 
-from functools import partial
-from typing import Any, Dict, Iterable, Optional, Tuple
+from typing import Iterable, Optional, Tuple
 
 import torch
 from torch import nn
diff --git a/python/sglang/srt/models/minicpmo.py b/python/sglang/srt/models/minicpmo.py
index 2f8271c6cbd..b83a86e221e 100644
--- a/python/sglang/srt/models/minicpmo.py
+++ b/python/sglang/srt/models/minicpmo.py
@@ -43,7 +43,6 @@
     general_mm_embed_routine,
 )
 from sglang.srt.managers.schedule_batch import (
-    Modality,
     MultimodalDataItem,
     MultimodalInputs,
     flatten_nested_list,
@@ -59,8 +58,6 @@
 try:
     from transformers import LogitsWarper
     from vector_quantize_pytorch import GroupedResidualFSQ
-    from vocos import Vocos
-    from vocos.pretrained import instantiate_class
 
     _tts_deps = True
 except:
diff --git a/python/sglang/srt/models/mixtral.py b/python/sglang/srt/models/mixtral.py
index 81026f9bb83..cb55848cfc7 100644
--- a/python/sglang/srt/models/mixtral.py
+++ b/python/sglang/srt/models/mixtral.py
@@ -24,7 +24,6 @@
 from transformers import MixtralConfig
 
 from sglang.srt.distributed import (
-    get_moe_expert_parallel_world_size,
     get_pp_group,
     get_tensor_model_parallel_world_size,
     tensor_model_parallel_all_reduce,
diff --git a/python/sglang/srt/models/opt.py b/python/sglang/srt/models/opt.py
index a571e8937be..bf989f6e89e 100644
--- a/python/sglang/srt/models/opt.py
+++ b/python/sglang/srt/models/opt.py
@@ -17,7 +17,6 @@
 from typing import Optional, Union
 
 import torch
-import torch.nn.functional as F
 from torch import nn
 from transformers import OPTConfig
 
@@ -26,10 +25,8 @@
     get_tensor_model_parallel_rank,
     get_tensor_model_parallel_world_size,
 )
-from sglang.srt.layers.activation import get_act_fn
 from sglang.srt.layers.linear import (
     ColumnParallelLinear,
-    MergedColumnParallelLinear,
     QKVParallelLinear,
     ReplicatedLinear,
     RowParallelLinear,
@@ -38,7 +35,7 @@
 from sglang.srt.layers.pooler import Pooler, PoolingType
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
 from sglang.srt.layers.radix_attention import RadixAttention
-from sglang.srt.layers.utils import PPMissingLayer, get_layer_id
+from sglang.srt.layers.utils import get_layer_id
 from sglang.srt.layers.vocab_parallel_embedding import (
     ParallelLMHead,
     VocabParallelEmbedding,
@@ -47,7 +44,6 @@
 from sglang.srt.model_loader.weight_utils import (
     default_weight_loader,
     kv_cache_scales_loader,
-    maybe_remap_kv_scale_name,
 )
 from sglang.srt.utils import add_prefix, make_layers
 
diff --git a/python/sglang/srt/models/phi.py b/python/sglang/srt/models/phi.py
index f48895c67f3..5679bc98781 100644
--- a/python/sglang/srt/models/phi.py
+++ b/python/sglang/srt/models/phi.py
@@ -1,5 +1,5 @@
 # Adapted from https://github.com/vllm-project/vllm/blob/main/vllm/model_executor/models/phi.py
-from typing import Iterable, Optional, Union
+from typing import Iterable, Optional
 
 import torch
 from torch import nn
diff --git a/python/sglang/srt/models/phi4mm.py b/python/sglang/srt/models/phi4mm.py
index 37a638acb5c..6d00144d2db 100644
--- a/python/sglang/srt/models/phi4mm.py
+++ b/python/sglang/srt/models/phi4mm.py
@@ -24,7 +24,7 @@
 import numpy as np
 import torch
 from torch import nn
-from transformers import PretrainedConfig, SiglipVisionConfig
+from transformers import PretrainedConfig
 
 from sglang.srt.layers.quantization import QuantizationConfig
 from sglang.srt.managers.mm_utils import (
diff --git a/python/sglang/srt/models/phimoe.py b/python/sglang/srt/models/phimoe.py
index 4604aeef989..0d147c2b178 100644
--- a/python/sglang/srt/models/phimoe.py
+++ b/python/sglang/srt/models/phimoe.py
@@ -18,7 +18,6 @@
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
 from sglang.srt.layers.radix_attention import RadixAttention
 from sglang.srt.layers.rotary_embedding import get_rope
-from sglang.srt.layers.utils import PPMissingLayer
 from sglang.srt.layers.vocab_parallel_embedding import (
     DEFAULT_VOCAB_PADDING_SIZE,
     ParallelLMHead,
diff --git a/python/sglang/srt/models/pixtral.py b/python/sglang/srt/models/pixtral.py
index 04a7362d8cb..209b40645a6 100644
--- a/python/sglang/srt/models/pixtral.py
+++ b/python/sglang/srt/models/pixtral.py
@@ -16,13 +16,10 @@
 Using mistral-community/pixtral-12b as reference.
 """
 
-import logging
-import math
 from typing import Iterable, List, Optional, Set, Tuple, Union
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 from transformers import PixtralVisionConfig, PretrainedConfig
 from transformers.models.pixtral.modeling_pixtral import PixtralRotaryEmbedding
 from transformers.models.pixtral.modeling_pixtral import (
diff --git a/python/sglang/srt/models/qwen.py b/python/sglang/srt/models/qwen.py
index 009650411e3..206908b4900 100644
--- a/python/sglang/srt/models/qwen.py
+++ b/python/sglang/srt/models/qwen.py
@@ -15,7 +15,6 @@
 # Adapted from
 # https://github.com/vllm-project/vllm/blob/c7f2cf2b7f67bce5842fedfdba508440fe257375/vllm/model_executor/models/qwen.py#L1
 
-import time
 from typing import Any, Dict, Iterable, Optional, Tuple
 
 import torch
diff --git a/python/sglang/srt/models/qwen2_audio.py b/python/sglang/srt/models/qwen2_audio.py
index 8609758a958..98f30636aba 100644
--- a/python/sglang/srt/models/qwen2_audio.py
+++ b/python/sglang/srt/models/qwen2_audio.py
@@ -23,30 +23,18 @@
 # limitations under the License.
 """Inference-only Qwen2-Audio model compatible with HuggingFace weights."""
 import logging
-import math
-from functools import lru_cache, partial
-from typing import Any, Iterable, List, Optional, Tuple, Type, TypedDict
+from typing import Any, Iterable, List, Optional, Tuple
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
-from einops import rearrange
-from transformers import AutoTokenizer, Qwen2AudioEncoderConfig, Qwen2Config
-from transformers.activations import ACT2FN
+from transformers import Qwen2AudioEncoderConfig, Qwen2Config
 from transformers.models.qwen2_audio.configuration_qwen2_audio import Qwen2AudioConfig
 from transformers.models.qwen2_audio.modeling_qwen2_audio import (
     Qwen2AudioEncoder,
     Qwen2AudioMultiModalProjector,
 )
 
-from sglang.srt.layers.activation import QuickGELU
-from sglang.srt.layers.attention.vision import VisionAttention
-from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
-from sglang.srt.layers.logits_processor import LogitsProcessor
-from sglang.srt.layers.pooler import Pooler, PoolingType
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
-from sglang.srt.layers.utils import get_layer_id
-from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
 from sglang.srt.managers.mm_utils import (
     MultiModalityDataPaddingPatternMultimodalTokens,
     general_mm_embed_routine,
@@ -60,7 +48,6 @@
 from sglang.srt.model_loader.weight_utils import default_weight_loader
 from sglang.srt.models.qwen2 import Qwen2ForCausalLM
 from sglang.srt.utils import add_prefix
-from sglang.srt.utils.hf_transformers_utils import get_processor
 
 logger = logging.getLogger(__name__)
 
diff --git a/python/sglang/srt/models/qwen2_vl.py b/python/sglang/srt/models/qwen2_vl.py
index 7a42829e834..73a212f5b31 100644
--- a/python/sglang/srt/models/qwen2_vl.py
+++ b/python/sglang/srt/models/qwen2_vl.py
@@ -28,7 +28,6 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 from einops import rearrange
 from transformers import Qwen2VLConfig
 from transformers.models.qwen2_vl.configuration_qwen2_vl import Qwen2VLVisionConfig
diff --git a/python/sglang/srt/models/qwen3_next.py b/python/sglang/srt/models/qwen3_next.py
index 1b11aa30bf3..9fe9e774848 100644
--- a/python/sglang/srt/models/qwen3_next.py
+++ b/python/sglang/srt/models/qwen3_next.py
@@ -1,18 +1,12 @@
 import enum
 import logging
-from typing import Any, Dict, Iterable, Optional, Set, Tuple
+from typing import Any, Iterable, Optional, Set, Tuple
 
 import torch
-import torch.nn.functional as F
 from torch import nn
 
 from sglang.srt.configs.qwen3_next import Qwen3NextConfig
-from sglang.srt.distributed import (
-    divide,
-    get_pp_group,
-    get_tensor_model_parallel_rank,
-    get_tensor_model_parallel_world_size,
-)
+from sglang.srt.distributed import divide, get_pp_group
 from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
 from sglang.srt.eplb.expert_location import ModelConfigForExpertLocation
 from sglang.srt.layers.attention.fla.layernorm_gated import RMSNorm as RMSNormGated
@@ -23,10 +17,9 @@
     get_attention_tp_size,
     is_dp_attention_enabled,
 )
-from sglang.srt.layers.layernorm import GemmaRMSNorm, RMSNorm
+from sglang.srt.layers.layernorm import GemmaRMSNorm
 from sglang.srt.layers.linear import (
     ColumnParallelLinear,
-    MergedColumnParallelLinear,
     QKVParallelLinear,
     RowParallelLinear,
 )
diff --git a/python/sglang/srt/models/qwen3_vl.py b/python/sglang/srt/models/qwen3_vl.py
index c41eb040316..be81eef62b6 100644
--- a/python/sglang/srt/models/qwen3_vl.py
+++ b/python/sglang/srt/models/qwen3_vl.py
@@ -20,18 +20,13 @@
 import numpy as np
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 from einops import rearrange
 from transformers.activations import ACT2FN
 from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
     Qwen2_5_VisionRotaryEmbedding,
 )
 
-from sglang.srt.configs.qwen3_vl import (
-    Qwen3VLConfig,
-    Qwen3VLTextConfig,
-    Qwen3VLVisionConfig,
-)
+from sglang.srt.configs.qwen3_vl import Qwen3VLConfig, Qwen3VLVisionConfig
 from sglang.srt.layers.attention.vision import VisionAttention
 from sglang.srt.layers.linear import ColumnParallelLinear, RowParallelLinear
 from sglang.srt.layers.logits_processor import LogitsProcessor
@@ -47,11 +42,7 @@
     MultimodalDataItem,
     MultimodalInputs,
 )
-from sglang.srt.model_executor.forward_batch_info import (
-    ForwardBatch,
-    ForwardMode,
-    PPProxyTensors,
-)
+from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
 from sglang.srt.model_loader.weight_utils import default_weight_loader
 from sglang.srt.models.qwen3 import Qwen3Model
 from sglang.srt.utils import add_prefix
diff --git a/python/sglang/srt/models/qwen3_vl_moe.py b/python/sglang/srt/models/qwen3_vl_moe.py
index c4d56a25701..3bf0b11239f 100644
--- a/python/sglang/srt/models/qwen3_vl_moe.py
+++ b/python/sglang/srt/models/qwen3_vl_moe.py
@@ -25,12 +25,8 @@
     get_moe_expert_parallel_world_size,
     get_tensor_model_parallel_rank,
 )
-from sglang.srt.layers.logits_processor import LogitsProcessor
 from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
 from sglang.srt.layers.quantization.base_config import QuantizationConfig
-from sglang.srt.layers.vocab_parallel_embedding import ParallelLMHead
-from sglang.srt.managers.mm_utils import general_mm_embed_routine
-from sglang.srt.managers.schedule_batch import MultimodalDataItem
 from sglang.srt.model_executor.forward_batch_info import ForwardBatch, PPProxyTensors
 from sglang.srt.model_loader.weight_utils import default_weight_loader
 from sglang.srt.models.qwen3_moe import Qwen3MoeModel
diff --git a/python/sglang/srt/models/roberta.py b/python/sglang/srt/models/roberta.py
index 209be1296b5..9fad5cfa3cc 100644
--- a/python/sglang/srt/models/roberta.py
+++ b/python/sglang/srt/models/roberta.py
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: Apache-2.0
 
-import itertools
 from typing import Iterable, Optional, Tuple
 
 import torch
diff --git a/python/sglang/srt/models/sarashina2_vision.py b/python/sglang/srt/models/sarashina2_vision.py
index eae34134923..f58908b5d15 100644
--- a/python/sglang/srt/models/sarashina2_vision.py
+++ b/python/sglang/srt/models/sarashina2_vision.py
@@ -17,7 +17,6 @@
 from typing import Iterable, List, Optional, Tuple
 
 import torch
-import torch.nn.functional as F
 from torch import nn
 from transformers import LlamaConfig
 
diff --git a/python/sglang/srt/models/step3_vl.py b/python/sglang/srt/models/step3_vl.py
index 14d277f9f38..5a9e74ab622 100644
--- a/python/sglang/srt/models/step3_vl.py
+++ b/python/sglang/srt/models/step3_vl.py
@@ -1,8 +1,7 @@
 import logging
 import math
-from collections.abc import Iterable
 from math import sqrt
-from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, TypedDict, Union
+from typing import Any, Dict, Iterable, List, Optional, Tuple
 
 import torch
 from torch import nn
diff --git a/python/sglang/srt/multimodal/processors/deepseek_vl_v2.py b/python/sglang/srt/multimodal/processors/deepseek_vl_v2.py
index b09402d0be1..26708e8dc01 100644
--- a/python/sglang/srt/multimodal/processors/deepseek_vl_v2.py
+++ b/python/sglang/srt/multimodal/processors/deepseek_vl_v2.py
@@ -18,9 +18,6 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 from typing import List, Union
 
-import torch
-
-from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
 from sglang.srt.models.deepseek_vl2 import DeepseekVL2ForCausalLM
 from sglang.srt.multimodal.processors.base_processor import (
     BaseMultimodalProcessor,
diff --git a/python/sglang/srt/multimodal/processors/dots_vlm.py b/python/sglang/srt/multimodal/processors/dots_vlm.py
index 3b95beff3a8..5f095d150f5 100644
--- a/python/sglang/srt/multimodal/processors/dots_vlm.py
+++ b/python/sglang/srt/multimodal/processors/dots_vlm.py
@@ -1,5 +1,4 @@
 import asyncio
-import math
 import re
 from typing import Dict, List, Union
 
diff --git a/python/sglang/srt/multimodal/processors/glm4v.py b/python/sglang/srt/multimodal/processors/glm4v.py
index e3c8edc9283..2051a426fa0 100644
--- a/python/sglang/srt/multimodal/processors/glm4v.py
+++ b/python/sglang/srt/multimodal/processors/glm4v.py
@@ -1,4 +1,3 @@
-import re
 from typing import List, Union
 
 from decord import VideoReader
@@ -9,10 +8,7 @@
 from sglang.srt.multimodal.processors.base_processor import (
     BaseMultimodalProcessor as SGLangBaseProcessor,
 )
-from sglang.srt.multimodal.processors.base_processor import (
-    BaseMultiModalProcessorOutput,
-    MultimodalSpecialTokens,
-)
+from sglang.srt.multimodal.processors.base_processor import MultimodalSpecialTokens
 
 
 class Glm4vImageProcessor(SGLangBaseProcessor):
diff --git a/python/sglang/srt/multimodal/processors/internvl.py b/python/sglang/srt/multimodal/processors/internvl.py
index c9a2d97ef28..a1ef6b67554 100644
--- a/python/sglang/srt/multimodal/processors/internvl.py
+++ b/python/sglang/srt/multimodal/processors/internvl.py
@@ -4,10 +4,8 @@
 
 import numpy as np
 import torch
-import torchvision.transforms as T
 from decord import VideoReader, cpu, gpu
 from PIL import Image
-from torchvision.transforms import InterpolationMode
 
 from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
 from sglang.srt.models.interns1 import InternS1ForConditionalGeneration
diff --git a/python/sglang/srt/multimodal/processors/janus_pro.py b/python/sglang/srt/multimodal/processors/janus_pro.py
index 54d6c197884..044e31dd29a 100644
--- a/python/sglang/srt/multimodal/processors/janus_pro.py
+++ b/python/sglang/srt/multimodal/processors/janus_pro.py
@@ -1,6 +1,5 @@
 from typing import List, Union
 
-from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
 from sglang.srt.models.deepseek_janus_pro import MultiModalityCausalLM
 from sglang.srt.multimodal.processors.base_processor import (
     BaseMultimodalProcessor,
diff --git a/python/sglang/srt/multimodal/processors/mllama4.py b/python/sglang/srt/multimodal/processors/mllama4.py
index 6a01f2aebff..4f04688b8ec 100644
--- a/python/sglang/srt/multimodal/processors/mllama4.py
+++ b/python/sglang/srt/multimodal/processors/mllama4.py
@@ -1,13 +1,5 @@
 from typing import List, Union
 
-import torch
-from transformers.image_utils import SizeDict
-from transformers.models.llama4.image_processing_llama4_fast import (
-    find_supported_resolutions,
-    get_best_fit,
-)
-
-from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
 from sglang.srt.models.mllama4 import Llama4ForConditionalGeneration
 from sglang.srt.multimodal.processors.base_processor import (
     BaseMultimodalProcessor,
diff --git a/python/sglang/srt/multimodal/processors/phi4mm.py b/python/sglang/srt/multimodal/processors/phi4mm.py
index 1487d2ca2f7..c59a41685a2 100644
--- a/python/sglang/srt/multimodal/processors/phi4mm.py
+++ b/python/sglang/srt/multimodal/processors/phi4mm.py
@@ -3,7 +3,6 @@
 
 from transformers.processing_utils import ProcessorMixin
 
-from sglang.srt.managers.schedule_batch import Modality, MultimodalDataItem
 from sglang.srt.models.phi4mm import Phi4MMForCausalLM
 from sglang.srt.multimodal.processors.base_processor import (
     BaseMultimodalProcessor,
diff --git a/python/sglang/srt/multimodal/processors/step3_vl.py b/python/sglang/srt/multimodal/processors/step3_vl.py
index ee537e68e7a..6bd691ecf3f 100644
--- a/python/sglang/srt/multimodal/processors/step3_vl.py
+++ b/python/sglang/srt/multimodal/processors/step3_vl.py
@@ -1,7 +1,7 @@
 import math
 import re
 from itertools import product
-from typing import List, Literal, Optional, TypedDict, Union
+from typing import List, Optional, Union
 
 import numpy as np
 import torch
diff --git a/python/sglang/srt/parser/reasoning_parser.py b/python/sglang/srt/parser/reasoning_parser.py
index f50368aed9c..0c01ede9cba 100644
--- a/python/sglang/srt/parser/reasoning_parser.py
+++ b/python/sglang/srt/parser/reasoning_parser.py
@@ -1,4 +1,3 @@
-import re
 from typing import Dict, Optional, Tuple, Type
 
 from sglang.srt.parser.harmony_parser import HarmonyParser
diff --git a/python/sglang/srt/server_args_config_parser.py b/python/sglang/srt/server_args_config_parser.py
index 74dc676778a..2fee7fc0ce8 100644
--- a/python/sglang/srt/server_args_config_parser.py
+++ b/python/sglang/srt/server_args_config_parser.py
@@ -5,7 +5,7 @@
 
 import logging
 from pathlib import Path
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List
 
 import yaml
 
diff --git a/python/sglang/srt/speculative/eagle_worker.py b/python/sglang/srt/speculative/eagle_worker.py
index e141a02386a..cb59b31f820 100644
--- a/python/sglang/srt/speculative/eagle_worker.py
+++ b/python/sglang/srt/speculative/eagle_worker.py
@@ -56,7 +56,7 @@
 )
 
 if is_cuda():
-    from sgl_kernel import segment_packbits
+    from sgl_kernel import segment_packbits  # noqa: F401
 
 logger = logging.getLogger(__name__)
 SGLANG_RETURN_ORIGINAL_LOGPROB = get_bool_env_var("SGLANG_RETURN_ORIGINAL_LOGPROB")
diff --git a/python/sglang/srt/speculative/spec_utils.py b/python/sglang/srt/speculative/spec_utils.py
index d89236dbe83..c00391bcb56 100644
--- a/python/sglang/srt/speculative/spec_utils.py
+++ b/python/sglang/srt/speculative/spec_utils.py
@@ -22,8 +22,6 @@
 from sglang.srt.utils import is_cuda, is_hip
 
 if TYPE_CHECKING:
-    from sglang.srt.mem_cache.allocator import TokenToKVPoolAllocator
-    from sglang.srt.mem_cache.memory_pool import ReqToTokenPool
     from sglang.srt.speculative.eagle_info import EagleVerifyInput
 
 
diff --git a/python/sglang/srt/speculative/standalone_worker.py b/python/sglang/srt/speculative/standalone_worker.py
index 23f9b9dd2c9..302799cc613 100644
--- a/python/sglang/srt/speculative/standalone_worker.py
+++ b/python/sglang/srt/speculative/standalone_worker.py
@@ -11,7 +11,7 @@
 from sglang.srt.utils import empty_context, get_bool_env_var, is_cuda
 
 if is_cuda():
-    from sgl_kernel import segment_packbits
+    from sgl_kernel import segment_packbits  # noqa: F401
 
 logger = logging.getLogger(__name__)
 SGLANG_RETURN_ORIGINAL_LOGPROB = get_bool_env_var("SGLANG_RETURN_ORIGINAL_LOGPROB")
diff --git a/python/sglang/srt/utils/common.py b/python/sglang/srt/utils/common.py
index 51ee7d10ee1..e2e6798c9f1 100644
--- a/python/sglang/srt/utils/common.py
+++ b/python/sglang/srt/utils/common.py
@@ -228,7 +228,7 @@ def support_triton(backend: str) -> bool:
 
 
 try:
-    import sgl_kernel
+    import sgl_kernel  # noqa: F401
 
     is_intel_amx_backend_available = hasattr(
         torch.ops.sgl_kernel, "convert_weight_packed"
@@ -1556,7 +1556,7 @@ def get_hpu_memory_capacity():
 
 def get_npu_memory_capacity():
     try:
-        import torch_npu
+        import torch_npu  # noqa: F401
 
         return torch.npu.mem_get_info()[1] // 1024 // 1024  # unit: MB
     except ImportError as e:
@@ -1743,7 +1743,7 @@ def get_device(device_id: Optional[int] = None) -> str:
 
     if is_habana_available():
         try:
-            import habana_frameworks.torch.hpu
+            import habana_frameworks.torch.hpu  # noqa: F401
 
             if torch.hpu.is_available():
                 if device_id == None:
@@ -1773,7 +1773,7 @@ def get_device_count() -> int:
 
     if is_habana_available():
         try:
-            import habana_frameworks.torch.hpu
+            import habana_frameworks.torch.hpu  # noqa: F401
 
             if torch.hpu.is_available():
                 return torch.hpu.device_count()
diff --git a/python/sglang/srt/utils/host_shared_memory.py b/python/sglang/srt/utils/host_shared_memory.py
index c599527f9b8..20ddf8fc7ef 100644
--- a/python/sglang/srt/utils/host_shared_memory.py
+++ b/python/sglang/srt/utils/host_shared_memory.py
@@ -1,5 +1,4 @@
 import logging
-import os
 from dataclasses import dataclass
 from multiprocessing import shared_memory
 from pathlib import Path
diff --git a/python/sglang/test/attention/test_flashattn_mla_backend.py b/python/sglang/test/attention/test_flashattn_mla_backend.py
index ebfd0b39544..16f94a2b234 100644
--- a/python/sglang/test/attention/test_flashattn_mla_backend.py
+++ b/python/sglang/test/attention/test_flashattn_mla_backend.py
@@ -4,7 +4,6 @@
 
 from sglang.srt.configs.model_config import AttentionArch
 from sglang.srt.layers.attention.flashattention_backend import FlashAttentionBackend
-from sglang.srt.layers.attention.torch_native_backend import TorchNativeAttnBackend
 from sglang.srt.layers.radix_attention import RadixAttention
 from sglang.srt.mem_cache.memory_pool import MLATokenToKVPool
 from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
diff --git a/python/sglang/test/attention/test_prefix_chunk_info.py b/python/sglang/test/attention/test_prefix_chunk_info.py
index c02d4d1d68f..2b85b695b8c 100644
--- a/python/sglang/test/attention/test_prefix_chunk_info.py
+++ b/python/sglang/test/attention/test_prefix_chunk_info.py
@@ -2,8 +2,6 @@
 
 import torch
 
-from sglang.srt.layers.attention.flashattention_backend import FlashAttentionBackend
-from sglang.srt.layers.radix_attention import RadixAttention
 from sglang.srt.mem_cache.memory_pool import MLATokenToKVPool
 from sglang.srt.model_executor.forward_batch_info import ForwardBatch, ForwardMode
 from sglang.test.test_utils import CustomTestCase
diff --git a/python/sglang/test/few_shot_gsm8k_engine.py b/python/sglang/test/few_shot_gsm8k_engine.py
index 05b095713d0..567816cfcf7 100644
--- a/python/sglang/test/few_shot_gsm8k_engine.py
+++ b/python/sglang/test/few_shot_gsm8k_engine.py
@@ -1,16 +1,13 @@
 import argparse
 import ast
 import asyncio
-import json
 import re
 import time
 
 import numpy as np
 
 import sglang as sgl
-from sglang.lang.api import set_default_backend
-from sglang.lang.backend.runtime_endpoint import RuntimeEndpoint
-from sglang.utils import download_and_cache_file, dump_state_text, read_jsonl
+from sglang.utils import download_and_cache_file, read_jsonl
 
 INVALID = -9999999
 
diff --git a/python/sglang/test/simple_eval_gpqa.py b/python/sglang/test/simple_eval_gpqa.py
index b77ca773e32..b39366ef5df 100644
--- a/python/sglang/test/simple_eval_gpqa.py
+++ b/python/sglang/test/simple_eval_gpqa.py
@@ -18,7 +18,6 @@
     HTML_JINJA,
     Eval,
     EvalResult,
-    MessageList,
     SamplerBase,
     SingleEvalResult,
     format_multichoice_question,
diff --git a/python/sglang/test/simple_eval_humaneval.py b/python/sglang/test/simple_eval_humaneval.py
index 25dcdd53af6..efd03af3825 100644
--- a/python/sglang/test/simple_eval_humaneval.py
+++ b/python/sglang/test/simple_eval_humaneval.py
@@ -11,8 +11,6 @@
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Dict, List, Optional
 
-import tqdm
-
 try:
     from human_eval.data import read_problems
     from human_eval.evaluation import estimate_pass_at_k
@@ -41,7 +39,6 @@ def evaluate_functional_correctness(
     Evaluates the functional correctness of generated samples, and writes
     results to f"{sample_file}_results.jsonl.gz"
     """
-    import copy
 
     # Check the generated samples against test suites.
     with ThreadPoolExecutor(max_workers=n_workers) as executor:
diff --git a/python/sglang/test/test_block_fp8.py b/python/sglang/test/test_block_fp8.py
index 80202d15e07..2390489cad4 100644
--- a/python/sglang/test/test_block_fp8.py
+++ b/python/sglang/test/test_block_fp8.py
@@ -1,5 +1,4 @@
 import itertools
-import os
 import unittest
 
 import torch
@@ -577,7 +576,7 @@ def setUpClass(cls):
         if not torch.cuda.is_available():
             raise unittest.SkipTest("CUDA is not available")
         try:
-            import deep_gemm
+            import deep_gemm  # noqa: F401
         except ImportError:
             raise unittest.SkipTest("DeepGEMM is not available")
         torch.set_default_device("cuda")
diff --git a/python/sglang/test/test_block_fp8_deep_gemm_blackwell.py b/python/sglang/test/test_block_fp8_deep_gemm_blackwell.py
index 36d7acddbcd..ac7239ea0f3 100644
--- a/python/sglang/test/test_block_fp8_deep_gemm_blackwell.py
+++ b/python/sglang/test/test_block_fp8_deep_gemm_blackwell.py
@@ -1,5 +1,4 @@
 import itertools
-import os
 import unittest
 from typing import List, Tuple
 
diff --git a/python/sglang/test/test_cutlass_moe.py b/python/sglang/test/test_cutlass_moe.py
index 377534a495d..fdab5a3acb0 100755
--- a/python/sglang/test/test_cutlass_moe.py
+++ b/python/sglang/test/test_cutlass_moe.py
@@ -1,5 +1,4 @@
 import argparse
-import time
 
 import torch
 import triton  # Added import
diff --git a/python/sglang/test/test_cutlass_w4a8_moe.py b/python/sglang/test/test_cutlass_w4a8_moe.py
index 7d96cccd5e0..e75154ef4b3 100644
--- a/python/sglang/test/test_cutlass_w4a8_moe.py
+++ b/python/sglang/test/test_cutlass_w4a8_moe.py
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
-from typing import Literal, Optional
+from typing import Optional
 
 import pytest
 import torch
diff --git a/python/sglang/test/test_marlin_moe.py b/python/sglang/test/test_marlin_moe.py
index 77b0109dff7..d58200edd7e 100644
--- a/python/sglang/test/test_marlin_moe.py
+++ b/python/sglang/test/test_marlin_moe.py
@@ -1,4 +1,3 @@
-import types
 from typing import Optional
 
 import pytest