We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 630cc70 commit de79b8dCopy full SHA for de79b8d
keras_hub/src/models/smollm3/smollm3_backbone.py
@@ -118,16 +118,16 @@ def __init__(
118
)
119
120
cache_update_index = kwargs.get('self_attention_cache_index')
121
- print(cache_update_index)
122
123
start_index = (
124
cache_update_index if cache_update_index is not None else 0
125
126
127
hidden_states = self.token_embedding(token_id_input)
128
- position_embeddings = self.rotary_embedding(hidden_states, start_index=start_index)
+
129
130
for decoder_layer in self.transformer_layers[:num_layers]:
+ position_embeddings = self.rotary_embedding(hidden_states, start_index=start_index)
131
hidden_states = decoder_layer(
132
hidden_states,
133
position_embeddings=position_embeddings,
0 commit comments