PaddlePaddle · miao200years · Aug 22, 2025 · Aug 25, 2025 · Aug 25, 2025 · Aug 25, 2025
diff --git a/paddleformers/quantization/quantization_config.py b/paddleformers/quantization/quantization_config.py
@@ -16,7 +16,10 @@
 import json
 from dataclasses import dataclass
 
-from paddle.nn.quant.quantized_linear import _get_arch_info
+try:
+    from paddle.nn.quant.quantized_linear import _get_arch_info
+except:
+    _get_arch_info = None
 
 quant_inference_mapping = {"avg": "abs_max", "abs_max_channel_wise": "abs_max_channel_wise", "abs_max": "abs_max"}
 fp8_format_mapping = {
@@ -114,7 +117,8 @@ def __init__(
                     f"weight_quantize_algo:{weight_quantize_algo} not in supported list ['weight_only_int8', 'weight_only_int4', 'llm.int8', 'a8w8', 'nf4', 'fp4']"
                 )
         if (
-            (isinstance(weight_quantize_algo, dict) and "fp8linear" in weight_quantize_algo)
+            _get_arch_info is not None
+            and (isinstance(weight_quantize_algo, dict) and "fp8linear" in weight_quantize_algo)
             or weight_quantize_algo == "fp8linear"
         ) and _get_arch_info() not in [89, 90]:
             raise RuntimeError("fp8Linear is only supported on NVIDIA Hopper GPUs.")

diff --git a/paddleformers/transformers/__init__.py b/paddleformers/transformers/__init__.py
@@ -37,12 +37,6 @@
         "AddedToken",
         "normalize_chars",
         "tokenize_special_chars,convert_to_unicode,",
-        "PreTrainedTokenizer",
-    ],
-    "tokenizer_utils_base": [
-        "PaddingStrategy",
-        "TextInput",
-        "TensorType",
     ],
     "attention_utils": ["create_bigbird_rand_mask_idx_list"],
     "tensor_parallel_utils": [],
@@ -88,6 +82,11 @@
         "AutoDiscriminator",
         "AutoModelForConditionalGeneration",
     ],
+    "tokenizer_utils_base": [
+        "PaddingStrategy",
+        "TextInput",
+        "TensorType",
+    ],
     "auto.processing": ["AutoProcessor"],
     "auto.tokenizer": ["AutoTokenizer"],
     "deepseek_v2.configuration": ["DeepseekV2Config"],
@@ -320,6 +319,8 @@
         "Qwen3MoePretrainingCriterion",
     ],
     "qwen3_moe.modeling_pp": ["Qwen3MoeForCausalLMPipe"],
+    "ernie4_5vl.tokenizer": ["Ernie4_5_VLTokenizer"],
+    "ernie4_5vl": [],
     "bert": [],
     "llama": [],
     "qwen2": [],
@@ -346,6 +347,7 @@
         tokenize_special_chars,
         convert_to_unicode,
     )
+    from .tokenizer_utils_fast import PretrainedTokenizerFast
     from .processing_utils import ProcessorMixin
     from .feature_extraction_utils import BatchFeature, FeatureExtractionMixin
     from .image_processing_utils import ImageProcessingMixin

diff --git a/paddleformers/transformers/auto/configuration.py b/paddleformers/transformers/auto/configuration.py
@@ -25,7 +25,6 @@
 from ...utils.import_utils import import_module
 from ...utils.log import logger
 from ..configuration_utils import PretrainedConfig
-from ..model_utils import PretrainedModel
 
 __all__ = [
     "AutoConfig",
@@ -222,6 +221,7 @@ def _get_config_class_from_config(
                 if config_class is not PretrainedConfig:
                     model_config_class = config_class
                     return model_config_class
+        from ..model_utils import PretrainedModel
 
         assert inspect.isclass(model_class) and issubclass(
             model_class, PretrainedModel

diff --git a/paddleformers/transformers/auto/tokenizer.py b/paddleformers/transformers/auto/tokenizer.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import json
+
+# import logging
 import os
 import warnings
 from typing import Dict, Optional, Union
@@ -140,13 +142,36 @@ def get_paddleformers_tokenizer_config(
     return result
 
 
-class AutoTokenizer(hf.AutoTokenizer):
+def _bind_paddle_mixin_if_available(tokenizer_class):
     """
-    Adapted from transformers.AutoTokenizer.from_pretrained with modifications:
-    1. Added get_paddleformers_tokenizer_config() to extend tokenizer_config.json download source
-    2. Explicitly binds PaddleTokenizerMixin to the tokenizer class before final instantiation
+    Bind the PaddleTokenizerMixin if Paddle is available; otherwise, return the original class.
 
-    Note: This extends HuggingFace's standard tokenizer loading logic with PaddlePaddle integration.
+    Args:
+        tokenizer_class: The original tokenizer class.
+
+    Returns:
+        The tokenizer class bound with PaddleTokenizerMixin, or the original class.
+    """
+    try:
+        return type(tokenizer_class.__name__, (PaddleTokenizerMixin, tokenizer_class), {})
+    except:
+        return tokenizer_class
+
+
+class AutoTokenizer(hf.AutoTokenizer):
+    """
+    Smart AutoTokenizer that automatically adapts based on available dependencies:
+
+    1. **Multi-source support**: Supports HuggingFace, PaddleFormers, and other download sources
+    2. **Conditional Paddle integration**: Automatically detects PaddlePaddle availability
+    3. **Fallback compatibility**: Works seamlessly with or without Paddle dependencies
+    4. **Enhanced functionality**: Extends HuggingFace's standard tokenizer loading logic
+
+    Features:
+    - Automatically binds PaddleTokenizerMixin when PaddlePaddle is available
+    - Falls back to pure Transformers mode when PaddlePaddle is not available
+    - Maintains full compatibility with all HuggingFace tokenizers
+    - Supports custom download sources through environment variables
     """
 
     @classmethod
@@ -201,7 +226,9 @@ def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
 
             if tokenizer_class is None:
                 raise ValueError(f"Tokenizer class {tokenizer_class_name} is not currently imported.")
-            tokenizer_class = type(tokenizer_class.__name__, (PaddleTokenizerMixin, tokenizer_class), {})
+
+            # Bind PaddleTokenizerMixin
+            tokenizer_class = _bind_paddle_mixin_if_available(tokenizer_class)
             return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
 
         # Next, let's try to use the tokenizer_config file to get the tokenizer class.
@@ -268,6 +295,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
                 or tokenizer_class_from_name(config_tokenizer_class + "Fast") is not None
             )
         )
+
         if has_remote_code:
             if use_fast and tokenizer_auto_map[1] is not None:
                 class_ref = tokenizer_auto_map[1]
@@ -285,11 +313,14 @@ def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
             tokenizer_class = get_class_from_dynamic_module(class_ref, pretrained_model_name_or_path, **kwargs)
             _ = kwargs.pop("code_revision", None)
             tokenizer_class.register_for_auto_class()
-            tokenizer_class = type(tokenizer_class.__name__, (PaddleTokenizerMixin, tokenizer_class), {})
+
+            # Bind PaddleTokenizerMixin
+            tokenizer_class = _bind_paddle_mixin_if_available(tokenizer_class)
             return tokenizer_class.from_pretrained(
                 pretrained_model_name_or_path, *inputs, trust_remote_code=trust_remote_code, **kwargs
             )
         elif config_tokenizer_class is not None:
+
             tokenizer_class = None
             if use_fast and not config_tokenizer_class.endswith("Fast"):
                 tokenizer_class_candidate = f"{config_tokenizer_class}Fast"
@@ -301,7 +332,9 @@ def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
                 raise ValueError(
                     f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
                 )
-            tokenizer_class = type(tokenizer_class.__name__, (PaddleTokenizerMixin, tokenizer_class), {})
+
+            # Bind PaddleTokenizerMixin
+            tokenizer_class = _bind_paddle_mixin_if_available(tokenizer_class)
             return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
 
         # Otherwise we have to be creative.
@@ -321,15 +354,13 @@ def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
             tokenizer_class_py, tokenizer_class_fast = TOKENIZER_MAPPING[type(config)]
 
             if tokenizer_class_fast and (use_fast or tokenizer_class_py is None):
-                tokenizer_class_fast = type(
-                    tokenizer_class_fast.__name__, (PaddleTokenizerMixin, tokenizer_class_fast), {}
-                )
+                # Bind PaddleTokenizerMixin
+                tokenizer_class_fast = _bind_paddle_mixin_if_available(tokenizer_class_fast)
                 return tokenizer_class_fast.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
             else:
                 if tokenizer_class_py is not None:
-                    tokenizer_class_py = type(
-                        tokenizer_class_py.__name__, (PaddleTokenizerMixin, tokenizer_class_py), {}
-                    )
+                    # Bind PaddleTokenizerMixin
+                    tokenizer_class_py = _bind_paddle_mixin_if_available(tokenizer_class_py)
                     return tokenizer_class_py.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
                 else:
                     raise ValueError(

diff --git a/paddleformers/transformers/configuration_utils.py b/paddleformers/transformers/configuration_utils.py
@@ -29,7 +29,6 @@
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, Union
 
-import paddle
 from huggingface_hub import hf_hub_download
 from huggingface_hub.utils import EntryNotFoundError
 
@@ -581,6 +580,8 @@ def __init__(self, **kwargs):
         if "torch_dtype" in kwargs:
             self.dtype = kwargs.pop("torch_dtype")
         else:
+            import paddle
+
             self.dtype = kwargs.pop("dtype", paddle.get_default_dtype())
 
         # Is decoder is used in encoder-decoder models to differentiate encoder from decoder

diff --git a/paddleformers/transformers/ernie4_5vl/__init__.py b/paddleformers/transformers/ernie4_5vl/__init__.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+from typing import TYPE_CHECKING
+
+from ...utils.lazy_import import _LazyModule
+
+import_structure = {
+    "tokenizer": ["Ernie4_5_VLTokenizer"],
+    "configuration": [
+        "Ernie4_5_VLMoEConfig",
+    ],
+}
+
+if TYPE_CHECKING:
+    from .configuration import *
+    from .tokenizer import Ernie4_5_VLTokenizer
+else:
+    sys.modules[__name__] = _LazyModule(
+        __name__,
+        globals()["__file__"],
+        import_structure,
+        module_spec=__spec__,
+    )