Skip to content

Commit 445aa30

Browse files
committed
Add docstrings for all layers.
1 parent 5651423 commit 445aa30

File tree

2 files changed

+129
-4
lines changed

2 files changed

+129
-4
lines changed

keras_hub/src/models/depth_anything/depth_anything_layers.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,29 @@
66

77

88
class DepthAnythingTokenToImage(layers.Layer):
9+
"""A layer that converts tokens into images.
10+
11+
Args:
12+
hidden_dim: int. The number of units in the hidden layers.
13+
patch_height: int. The height of each patch.
14+
patch_width: int. The width of each patch.
15+
num_cls_tokens: int. The number of class tokens at the beginning of
16+
the sequence. Defaults to `1`.
17+
num_register_tokens: int. The number of register tokens after the
18+
class tokens. Defaults to `0`.
19+
data_format: `None` or str. If specified, either `"channels_last"` or
20+
`"channels_first"`. The ordering of the dimensions in the
21+
inputs. `"channels_last"` corresponds to inputs with shape
22+
`(batch_size, height, width, channels)`
23+
while `"channels_first"` corresponds to inputs with shape
24+
`(batch_size, channels, height, width)`. It defaults to the
25+
`image_data_format` value found in your Keras config file at
26+
`~/.keras/keras.json`. If you never set it, then it will be
27+
`"channels_last"`.
28+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
29+
including `name`, `dtype` etc.
30+
"""
31+
932
def __init__(
1033
self,
1134
hidden_dim,
@@ -65,6 +88,26 @@ def compute_output_shape(self, input_shape):
6588

6689

6790
class DepthAnythingReassembleLayer(layers.Layer):
91+
"""A layer that resizes the input images.
92+
93+
Args:
94+
hidden_dim: int. The number of units in the hidden layers.
95+
factor: float. The resizing factor. If `factor > 1`, the layer upsamples
96+
the input. If `factor < 1`, the layer downsamples the input. If
97+
`factor == 1`, the layer only applies a linear projection.
98+
data_format: `None` or str. If specified, either `"channels_last"` or
99+
`"channels_first"`. The ordering of the dimensions in the
100+
inputs. `"channels_last"` corresponds to inputs with shape
101+
`(batch_size, height, width, channels)`
102+
while `"channels_first"` corresponds to inputs with shape
103+
`(batch_size, channels, height, width)`. It defaults to the
104+
`image_data_format` value found in your Keras config file at
105+
`~/.keras/keras.json`. If you never set it, then it will be
106+
`"channels_last"`.
107+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
108+
including `name`, `dtype` etc.
109+
"""
110+
68111
def __init__(self, hidden_dim, factor, data_format=None, **kwargs):
69112
super().__init__(**kwargs)
70113
self.hidden_dim = int(hidden_dim)
@@ -152,6 +195,23 @@ def compute_output_shape(self, input_shape):
152195

153196

154197
class DepthAnythingPreActResidualLayer(layers.Layer):
198+
"""A ReLU + Conv2D layer.
199+
200+
Args:
201+
hidden_dim: int. The number of units in the hidden layers.
202+
data_format: `None` or str. If specified, either `"channels_last"` or
203+
`"channels_first"`. The ordering of the dimensions in the
204+
inputs. `"channels_last"` corresponds to inputs with shape
205+
`(batch_size, height, width, channels)`
206+
while `"channels_first"` corresponds to inputs with shape
207+
`(batch_size, channels, height, width)`. It defaults to the
208+
`image_data_format` value found in your Keras config file at
209+
`~/.keras/keras.json`. If you never set it, then it will be
210+
`"channels_last"`.
211+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
212+
including `name`, `dtype` etc.
213+
"""
214+
155215
def __init__(self, hidden_dim, data_format=None, **kwargs):
156216
super().__init__(**kwargs)
157217
self.hidden_dim = int(hidden_dim)
@@ -229,6 +289,24 @@ def compute_output_shape(self, input_shape):
229289

230290

231291
class DepthAnythingFeatureFusionLayer(layers.Layer):
292+
"""A layer that fuses the incoming features.
293+
294+
Args:
295+
hidden_dim: int. The number of units in the hidden layers.
296+
size: tuple of int. The target size of the output feature map.
297+
data_format: `None` or str. If specified, either `"channels_last"` or
298+
`"channels_first"`. The ordering of the dimensions in the
299+
inputs. `"channels_last"` corresponds to inputs with shape
300+
`(batch_size, height, width, channels)`
301+
while `"channels_first"` corresponds to inputs with shape
302+
`(batch_size, channels, height, width)`. It defaults to the
303+
`image_data_format` value found in your Keras config file at
304+
`~/.keras/keras.json`. If you never set it, then it will be
305+
`"channels_last"`.
306+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
307+
including `name`, `dtype` etc.
308+
"""
309+
232310
def __init__(self, hidden_dim, size, data_format=None, **kwargs):
233311
super().__init__(**kwargs)
234312
self.hidden_dim = int(hidden_dim)
@@ -301,6 +379,33 @@ def compute_output_shape(self, input_shape):
301379

302380

303381
class DepthAnythingNeck(layers.Layer):
382+
"""A DepthAnything neck layer.
383+
384+
Args:
385+
patch_size: int. The size of one side of each patch.
386+
image_size: tuple of ints. The (height, width) of the input images.
387+
backbone_hidden_dim: int. The number of units in the backbone layers.
388+
neck_hidden_dims: List of int. The number of units in each neck layer.
389+
reassemble_factors: List of float. The resizing factor in each neck
390+
layer.
391+
fusion_hidden_dim: int. The number of units in the fusion layers.
392+
num_cls_tokens: int. The number of class tokens at the beginning of
393+
the sequence. Defaults to `1`.
394+
num_register_tokens: int. The number of register tokens after the
395+
class tokens. Defaults to `0`.
396+
data_format: `None` or str. If specified, either `"channels_last"` or
397+
`"channels_first"`. The ordering of the dimensions in the
398+
inputs. `"channels_last"` corresponds to inputs with shape
399+
`(batch_size, height, width, channels)`
400+
while `"channels_first"` corresponds to inputs with shape
401+
`(batch_size, channels, height, width)`. It defaults to the
402+
`image_data_format` value found in your Keras config file at
403+
`~/.keras/keras.json`. If you never set it, then it will be
404+
`"channels_last"`.
405+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
406+
including `name`, `dtype` etc.
407+
"""
408+
304409
def __init__(
305410
self,
306411
patch_size,
@@ -464,6 +569,30 @@ def get_config(self):
464569

465570

466571
class DepthAnythingDepthEstimationHead(layers.Layer):
572+
"""A DepthAnything neck layer.
573+
574+
Args:
575+
patch_size: int. The size of one side of each patch.
576+
patch_height: int. The height of each patch.
577+
patch_width: int. The width of each patch.
578+
hidden_dim: int. The number of units in the hidden layers.
579+
fusion_hidden_dim: int. The number of units in the fusion layers.
580+
head_hidden_dim: int. The number of units in the head layers.
581+
head_in_index: int. The index of the feature map to be used as input
582+
to the head.
583+
data_format: `None` or str. If specified, either `"channels_last"` or
584+
`"channels_first"`. The ordering of the dimensions in the
585+
inputs. `"channels_last"` corresponds to inputs with shape
586+
`(batch_size, height, width, channels)`
587+
while `"channels_first"` corresponds to inputs with shape
588+
`(batch_size, channels, height, width)`. It defaults to the
589+
`image_data_format` value found in your Keras config file at
590+
`~/.keras/keras.json`. If you never set it, then it will be
591+
`"channels_last"`.
592+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
593+
including `name`, `dtype` etc.
594+
"""
595+
467596
def __init__(
468597
self,
469598
patch_size,

tools/checkpoint_conversion/convert_depth_anything_checkpoints.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,22 +62,18 @@ def convert_model(hf_model, dtype=None):
6262
)
6363
image_encoder = DINOV2Backbone(**dinov2_config)
6464
model_config = hf_model.config.to_dict()
65-
image_shape = dinov2_config["image_shape"]
6665
# In KerasHub, the stage names are capitalized.
6766
feature_keys = model_config["backbone_config"]["out_features"]
6867
feature_keys = [key.replace("stage", "Stage") for key in feature_keys]
6968
assert model_config["depth_estimation_type"] == "relative"
7069
assert model_config["max_depth"] in (None, 1.0)
7170
return DepthAnythingBackbone(
7271
image_encoder,
73-
image_encoder.patch_size,
74-
image_encoder.hidden_dim,
7572
reassemble_factors=model_config["reassemble_factors"],
7673
neck_hidden_dims=model_config["neck_hidden_sizes"],
7774
fusion_hidden_dim=model_config["fusion_hidden_size"],
7875
head_hidden_dim=model_config["head_hidden_size"],
7976
head_in_index=model_config["head_in_index"],
80-
image_shape=image_shape,
8177
feature_keys=feature_keys,
8278
dtype=dtype,
8379
)

0 commit comments

Comments
 (0)