Add docstrings for all layers.

james77777778 · james77777778 · commit 445aa30e3d5b · 2025-09-01T10:36:30.000+08:00
diff --git a/keras_hub/src/models/depth_anything/depth_anything_layers.py b/keras_hub/src/models/depth_anything/depth_anything_layers.py
@@ -6,6 +6,29 @@
 
 
 class DepthAnythingTokenToImage(layers.Layer):
+    """A layer that converts tokens into images.
+
+    Args:
+        hidden_dim: int. The number of units in the hidden layers.
+        patch_height: int. The height of each patch.
+        patch_width: int. The width of each patch.
+        num_cls_tokens: int. The number of class tokens at the beginning of
+            the sequence. Defaults to `1`.
+        num_register_tokens: int. The number of register tokens after the
+            class tokens. Defaults to `0`.
+        data_format: `None` or str. If specified, either `"channels_last"` or
+            `"channels_first"`. The ordering of the dimensions in the
+            inputs. `"channels_last"` corresponds to inputs with shape
+            `(batch_size, height, width, channels)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch_size, channels, height, width)`. It defaults to the
+            `image_data_format` value found in your Keras config file at
+            `~/.keras/keras.json`. If you never set it, then it will be
+            `"channels_last"`.
+        **kwargs: other keyword arguments passed to `keras.layers.Layer`,
+            including `name`, `dtype` etc.
+    """
+
     def __init__(
         self,
         hidden_dim,
@@ -65,6 +88,26 @@ def compute_output_shape(self, input_shape):
 
 
 class DepthAnythingReassembleLayer(layers.Layer):
+    """A layer that resizes the input images.
+
+    Args:
+        hidden_dim: int. The number of units in the hidden layers.
+        factor: float. The resizing factor. If `factor > 1`, the layer upsamples
+            the input. If `factor < 1`, the layer downsamples the input. If
+            `factor == 1`, the layer only applies a linear projection.
+        data_format: `None` or str. If specified, either `"channels_last"` or
+            `"channels_first"`. The ordering of the dimensions in the
+            inputs. `"channels_last"` corresponds to inputs with shape
+            `(batch_size, height, width, channels)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch_size, channels, height, width)`. It defaults to the
+            `image_data_format` value found in your Keras config file at
+            `~/.keras/keras.json`. If you never set it, then it will be
+            `"channels_last"`.
+        **kwargs: other keyword arguments passed to `keras.layers.Layer`,
+            including `name`, `dtype` etc.
+    """
+
     def __init__(self, hidden_dim, factor, data_format=None, **kwargs):
         super().__init__(**kwargs)
         self.hidden_dim = int(hidden_dim)
@@ -152,6 +195,23 @@ def compute_output_shape(self, input_shape):
 
 
 class DepthAnythingPreActResidualLayer(layers.Layer):
+    """A ReLU + Conv2D layer.
+
+    Args:
+        hidden_dim: int. The number of units in the hidden layers.
+        data_format: `None` or str. If specified, either `"channels_last"` or
+            `"channels_first"`. The ordering of the dimensions in the
+            inputs. `"channels_last"` corresponds to inputs with shape
+            `(batch_size, height, width, channels)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch_size, channels, height, width)`. It defaults to the
+            `image_data_format` value found in your Keras config file at
+            `~/.keras/keras.json`. If you never set it, then it will be
+            `"channels_last"`.
+        **kwargs: other keyword arguments passed to `keras.layers.Layer`,
+            including `name`, `dtype` etc.
+    """
+
     def __init__(self, hidden_dim, data_format=None, **kwargs):
         super().__init__(**kwargs)
         self.hidden_dim = int(hidden_dim)
@@ -229,6 +289,24 @@ def compute_output_shape(self, input_shape):
 
 
 class DepthAnythingFeatureFusionLayer(layers.Layer):
+    """A layer that fuses the incoming features.
+
+    Args:
+        hidden_dim: int. The number of units in the hidden layers.
+        size: tuple of int. The target size of the output feature map.
+        data_format: `None` or str. If specified, either `"channels_last"` or
+            `"channels_first"`. The ordering of the dimensions in the
+            inputs. `"channels_last"` corresponds to inputs with shape
+            `(batch_size, height, width, channels)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch_size, channels, height, width)`. It defaults to the
+            `image_data_format` value found in your Keras config file at
+            `~/.keras/keras.json`. If you never set it, then it will be
+            `"channels_last"`.
+        **kwargs: other keyword arguments passed to `keras.layers.Layer`,
+            including `name`, `dtype` etc.
+    """
+
     def __init__(self, hidden_dim, size, data_format=None, **kwargs):
         super().__init__(**kwargs)
         self.hidden_dim = int(hidden_dim)
@@ -301,6 +379,33 @@ def compute_output_shape(self, input_shape):
 
 
 class DepthAnythingNeck(layers.Layer):
+    """A DepthAnything neck layer.
+
+    Args:
+        patch_size: int. The size of one side of each patch.
+        image_size: tuple of ints. The (height, width) of the input images.
+        backbone_hidden_dim: int. The number of units in the backbone layers.
+        neck_hidden_dims: List of int. The number of units in each neck layer.
+        reassemble_factors: List of float. The resizing factor in each neck
+            layer.
+        fusion_hidden_dim: int. The number of units in the fusion layers.
+        num_cls_tokens: int. The number of class tokens at the beginning of
+            the sequence. Defaults to `1`.
+        num_register_tokens: int. The number of register tokens after the
+            class tokens. Defaults to `0`.
+        data_format: `None` or str. If specified, either `"channels_last"` or
+            `"channels_first"`. The ordering of the dimensions in the
+            inputs. `"channels_last"` corresponds to inputs with shape
+            `(batch_size, height, width, channels)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch_size, channels, height, width)`. It defaults to the
+            `image_data_format` value found in your Keras config file at
+            `~/.keras/keras.json`. If you never set it, then it will be
+            `"channels_last"`.
+        **kwargs: other keyword arguments passed to `keras.layers.Layer`,
+            including `name`, `dtype` etc.
+    """
+
     def __init__(
         self,
         patch_size,
@@ -464,6 +569,30 @@ def get_config(self):
 
 
 class DepthAnythingDepthEstimationHead(layers.Layer):
+    """A DepthAnything neck layer.
+
+    Args:
+        patch_size: int. The size of one side of each patch.
+        patch_height: int. The height of each patch.
+        patch_width: int. The width of each patch.
+        hidden_dim: int. The number of units in the hidden layers.
+        fusion_hidden_dim: int. The number of units in the fusion layers.
+        head_hidden_dim: int. The number of units in the head layers.
+        head_in_index: int. The index of the feature map to be used as input
+            to the head.
+        data_format: `None` or str. If specified, either `"channels_last"` or
+            `"channels_first"`. The ordering of the dimensions in the
+            inputs. `"channels_last"` corresponds to inputs with shape
+            `(batch_size, height, width, channels)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch_size, channels, height, width)`. It defaults to the
+            `image_data_format` value found in your Keras config file at
+            `~/.keras/keras.json`. If you never set it, then it will be
+            `"channels_last"`.
+        **kwargs: other keyword arguments passed to `keras.layers.Layer`,
+            including `name`, `dtype` etc.
+    """
+
     def __init__(
         self,
         patch_size,
diff --git a/tools/checkpoint_conversion/convert_depth_anything_checkpoints.py b/tools/checkpoint_conversion/convert_depth_anything_checkpoints.py
@@ -62,22 +62,18 @@ def convert_model(hf_model, dtype=None):
     )
     image_encoder = DINOV2Backbone(**dinov2_config)
     model_config = hf_model.config.to_dict()
-    image_shape = dinov2_config["image_shape"]
     # In KerasHub, the stage names are capitalized.
     feature_keys = model_config["backbone_config"]["out_features"]
     feature_keys = [key.replace("stage", "Stage") for key in feature_keys]
     assert model_config["depth_estimation_type"] == "relative"
     assert model_config["max_depth"] in (None, 1.0)
     return DepthAnythingBackbone(
         image_encoder,
-        image_encoder.patch_size,
-        image_encoder.hidden_dim,
         reassemble_factors=model_config["reassemble_factors"],
         neck_hidden_dims=model_config["neck_hidden_sizes"],
         fusion_hidden_dim=model_config["fusion_hidden_size"],
         head_hidden_dim=model_config["head_hidden_size"],
         head_in_index=model_config["head_in_index"],
-        image_shape=image_shape,
         feature_keys=feature_keys,
         dtype=dtype,
     )