Skip to content

Commit b64eada

Browse files
removes assertion
1 parent 451f6a5 commit b64eada

File tree

874 files changed

+124796
-7
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

874 files changed

+124796
-7
lines changed

tmp_build_dir/keras_hub/__init__.py

Whitespace-only changes.

tmp_build_dir/keras_hub/src/__init__.py

Whitespace-only changes.
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import types
2+
3+
from keras.saving import register_keras_serializable
4+
5+
try:
6+
import namex
7+
except ImportError:
8+
namex = None
9+
10+
11+
def maybe_register_serializable(path, symbol):
12+
if isinstance(path, (list, tuple)):
13+
# If we have multiple export names, actually make sure to register these
14+
# first. This makes sure we have a backward compat mapping of old
15+
# serialized names to new class.
16+
for name in path:
17+
name = name.split(".")[-1]
18+
register_keras_serializable(package="keras_nlp", name=name)(symbol)
19+
register_keras_serializable(package="keras_hub", name=name)(symbol)
20+
if isinstance(symbol, types.FunctionType) or hasattr(symbol, "get_config"):
21+
# We register twice, first with keras_nlp, second with keras_hub,
22+
# so loading still works for classes saved as "keras_nlp".
23+
register_keras_serializable(package="keras_nlp")(symbol)
24+
register_keras_serializable(package="keras_hub")(symbol)
25+
26+
27+
if namex:
28+
29+
class keras_hub_export(namex.export):
30+
def __init__(self, path):
31+
super().__init__(package="keras_hub", path=path)
32+
33+
def __call__(self, symbol):
34+
maybe_register_serializable(self.path, symbol)
35+
return super().__call__(symbol)
36+
37+
else:
38+
39+
class keras_hub_export:
40+
def __init__(self, path):
41+
self.path = path
42+
43+
def __call__(self, symbol):
44+
maybe_register_serializable(self.path, symbol)
45+
return symbol

tmp_build_dir/keras_hub/src/layers/__init__.py

Whitespace-only changes.

tmp_build_dir/keras_hub/src/layers/modeling/__init__.py

Whitespace-only changes.
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
import math
2+
3+
import keras
4+
from keras import ops
5+
6+
from keras_hub.src.api_export import keras_hub_export
7+
8+
9+
@keras_hub_export("keras_hub.layers.AlibiBias")
10+
class AlibiBias(keras.layers.Layer):
11+
"""A layer that adds the alibi bias to attention scores.
12+
13+
This layer adds the alibi bias to the attention scores. Alibi bias is a
14+
linear, non-learned bias. Defined and formalized in
15+
[Train Short, Test Long: Attention with Linear Biases Enables Input Length Extrapolation](https://arxiv.org/abs/2108.12409).
16+
17+
This layer takes as input the attention scores. and returns the attention
18+
scores after adding the alibi bias to it. The output will have the same
19+
shape as the input.
20+
21+
Args:
22+
alibi_bias_max: int. This value will be used to compute the slope of
23+
each head. The heads' slopes are a geometric sequence that starts at
24+
`2**(-alibi_bias_max/num_heads)` and uses that same value as its
25+
ratio. Defaults to 8.
26+
**kwargs: other keyword arguments passed to `keras.layers.Layer`,
27+
including `name`, `trainable`, `dtype` etc.
28+
29+
Call arguments:
30+
attention_scores: The result of multipying the query and the key of the
31+
multi-head attention layer of the transformer to add alibi bias to
32+
it. With shape `(batch_size, num_heads, query_length, key_length)`.
33+
34+
Example:
35+
```python
36+
query_length = 10
37+
key_length = 10
38+
num_heads = 4
39+
batch_size = 2
40+
hidden_dim = 8
41+
42+
# Create new alibi layer.
43+
alibi_layer = keras_hub.layers.AlibiBias()
44+
45+
query = np.zeros((batch_size, num_heads, query_length, hidden_dim))
46+
key = np.zeros((batch_size, num_heads, hidden_dim, key_length))
47+
48+
attention_scores = keras.ops.matmul(query, key)
49+
50+
# Add alibi bias to attention scores.
51+
attention_scores = alibi_layer(attention_scores)
52+
```
53+
54+
References:
55+
- [Press et al., 2021](https://arxiv.org/abs/2108.12409)
56+
"""
57+
58+
def __init__(
59+
self,
60+
alibi_bias_max=8,
61+
**kwargs,
62+
):
63+
super().__init__(**kwargs)
64+
self.alibi_bias_max = alibi_bias_max
65+
66+
def call(self, attention_scores):
67+
shape = ops.shape(attention_scores)
68+
if len(shape) != 4:
69+
raise ValueError(
70+
"Expected `attention_scores` shape to be "
71+
"`(batch_size, num_heads, query_length, key_Length)`."
72+
f" Recived shape={shape}"
73+
)
74+
75+
key_length = shape[-1]
76+
num_heads = shape[-3]
77+
78+
alibi_bias = self._get_alibi_bias(num_heads, key_length)
79+
80+
return ops.add(attention_scores, alibi_bias)
81+
82+
def _get_alibi_bias(self, num_heads, key_length):
83+
slopes = ops.convert_to_tensor(
84+
self._get_slopes(num_heads), dtype=self.compute_dtype
85+
)
86+
slopes = ops.expand_dims(slopes, 1)
87+
88+
seq_range = ops.expand_dims(
89+
ops.arange(1 - key_length, 1, dtype="int32"), 0
90+
)
91+
seq_range = ops.cast(seq_range, dtype=self.compute_dtype)
92+
93+
alibi_bias = ops.multiply(slopes, seq_range)
94+
alibi_bias = ops.expand_dims(alibi_bias, 1)
95+
96+
# return shape is `(1, num_heads, 1, key_length)`
97+
return ops.expand_dims(alibi_bias, 0)
98+
99+
def _get_slopes(self, num_heads):
100+
# this function is adopted from Alibi original implementation.
101+
# https://github.com/ofirpress/attention_with_linear_biases/blob/a35aaca144e0eb6b789dfcb46784c4b8e31b7983/fairseq/models/transformer.py#L742
102+
def get_slopes_power_of_2(n):
103+
start = 2 ** (
104+
-(2 ** -(math.log2(n) - math.log2(self.alibi_bias_max)))
105+
)
106+
ratio = start
107+
return [start * ratio**i for i in range(n)]
108+
109+
if math.log2(num_heads).is_integer():
110+
return get_slopes_power_of_2(num_heads)
111+
else:
112+
closest_power_of_2 = 2 ** math.floor(math.log2(num_heads))
113+
return (
114+
get_slopes_power_of_2(closest_power_of_2)
115+
+ self._get_slopes(2 * closest_power_of_2)[0::2][
116+
: num_heads - closest_power_of_2
117+
]
118+
)
119+
120+
def compute_output_shape(self, input_shape):
121+
return input_shape
122+
123+
def get_config(self):
124+
config = super().get_config()
125+
config.update(
126+
{
127+
"alibi_bias_max": self.alibi_bias_max,
128+
}
129+
)
130+
return config
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
import keras
2+
from keras import ops
3+
from keras import random
4+
5+
from keras_hub.src.layers.modeling.alibi_bias import AlibiBias
6+
from keras_hub.src.tests.test_case import TestCase
7+
8+
9+
class AlibiBiasTest(TestCase):
10+
def test_layer_behaviors(self):
11+
alibi_bias_max = 8
12+
batch_size = 4
13+
num_heads = 8
14+
query_length = 10
15+
key_length = 10
16+
self.run_layer_test(
17+
cls=AlibiBias,
18+
init_kwargs={
19+
"alibi_bias_max": alibi_bias_max,
20+
},
21+
input_data=random.uniform(
22+
shape=(batch_size, num_heads, query_length, key_length)
23+
),
24+
expected_output_shape=(
25+
batch_size,
26+
num_heads,
27+
query_length,
28+
key_length,
29+
),
30+
)
31+
32+
def test_float16_dtype(self):
33+
# Create a 4-dimensional input (the first dimension is implicit).
34+
alibi_bias_max = 8
35+
num_heads = 8
36+
query_length = 5
37+
key_length = 10
38+
test_layer = AlibiBias(alibi_bias_max=alibi_bias_max, dtype="float16")
39+
input_tensor = keras.Input(shape=(num_heads, query_length, key_length))
40+
output_tensor = test_layer(input_tensor)
41+
42+
# the output is expected to be the same as the input shape in all
43+
# dimensions. here, the first dimension is implicit and is for batch
44+
expected_output_shape = (None, num_heads, query_length, key_length)
45+
self.assertEqual(expected_output_shape, output_tensor.shape)
46+
# The default output dtype for this layer should be "float32".
47+
self.assertEqual("float16", output_tensor.dtype)
48+
49+
def test_dynamic_layer_output_shape(self):
50+
query_length = 10
51+
key_length = 10
52+
num_heads = 4
53+
54+
test_layer = AlibiBias()
55+
# Create a 4-dimensional input (the first dimension is implicit).
56+
input_tensor = keras.Input(shape=(num_heads, query_length, key_length))
57+
output_tensor = test_layer(input_tensor)
58+
59+
# the output is expected to be the same as the input shape in all
60+
# dimensions.
61+
expected_output_shape = (
62+
None,
63+
num_heads,
64+
query_length,
65+
key_length,
66+
)
67+
self.assertEqual(expected_output_shape, output_tensor.shape)
68+
69+
def test_value_error_when_inputs_shape_is_not_4(self):
70+
with self.assertRaises(ValueError):
71+
AlibiBias()(random.uniform(shape=(12, 12)))
72+
73+
def test_num_heads_is_not_power_of_two(self):
74+
inputs_shape = (1, 12, 12, 12)
75+
inputs = random.uniform(shape=inputs_shape)
76+
layer = AlibiBias()
77+
outputs = layer(inputs)
78+
self.assertEqual(inputs_shape, outputs.shape)
79+
80+
def test_correct_output(self):
81+
batch_size = 1
82+
num_heads = 8
83+
query_length = 1
84+
key_length = 3
85+
input_shape = (batch_size, num_heads, query_length, key_length)
86+
input_tensor = ops.zeros(input_shape)
87+
layer = AlibiBias()
88+
output_tensor = layer(input_tensor)
89+
self.assertAllClose(
90+
output_tensor,
91+
ops.convert_to_tensor(
92+
[
93+
[
94+
[[-1.0, -0.5, 0.0]],
95+
[[-0.5, -0.25, 0.0]],
96+
[[-0.25, -0.125, 0.0]],
97+
[[-0.125, -0.0625, 0.0]],
98+
[[-0.0625, -0.03125, 0.0]],
99+
[[-0.03125, -0.015625, 0.0]],
100+
[[-0.015625, -0.0078125, 0.0]],
101+
[[-0.0078125, -0.00390625, 0.0]],
102+
]
103+
]
104+
),
105+
)
106+
107+
def test_correct_output_num_heads_not_power_of_two(self):
108+
batch_size = 1
109+
num_heads = 14
110+
query_length = 1
111+
key_length = 3
112+
input_shape = (batch_size, num_heads, query_length, key_length)
113+
input_tensor = ops.zeros(input_shape)
114+
layer = AlibiBias()
115+
output_tensor = layer(input_tensor)
116+
self.assertAllClose(
117+
output_tensor,
118+
ops.convert_to_tensor(
119+
[
120+
[
121+
[[-1.0, -0.5, 0.0]],
122+
[[-0.5, -0.25, 0.0]],
123+
[[-0.25, -0.125, 0.0]],
124+
[[-0.125, -0.0625, 0.0]],
125+
[[-0.0625, -0.03125, 0.0]],
126+
[[-0.03125, -0.015625, 0.0]],
127+
[[-0.015625, -0.0078125, 0.0]],
128+
[[-0.0078125, -0.00390625, 0.0]],
129+
[[-1.4142135, -0.70710677, 0.0]],
130+
[[-0.70710677, -0.35355338, 0.0]],
131+
[[-0.35355338, -0.17677669, 0.0]],
132+
[[-0.17677669, -0.08838835, 0.0]],
133+
[[-0.08838835, -0.04419417, 0.0]],
134+
[[-0.04419417, -0.02209709, 0.0]],
135+
]
136+
]
137+
),
138+
)
139+
140+
def test_correct_output_alibi_bias_max(self):
141+
alibi_bias_max = 12
142+
batch_size = 1
143+
num_heads = 2
144+
query_length = 1
145+
key_length = 3
146+
input_shape = (batch_size, num_heads, query_length, key_length)
147+
input_tensor = ops.zeros(input_shape)
148+
layer = AlibiBias(alibi_bias_max=alibi_bias_max)
149+
output_tensor = layer(input_tensor)
150+
self.assertAllClose(
151+
output_tensor,
152+
ops.convert_to_tensor(
153+
[
154+
[
155+
[[-0.03125, -0.015625, 0.0]],
156+
[[-0.00048828, -0.00024414, 0.0]],
157+
]
158+
]
159+
),
160+
)
161+
162+
def test_correct_output_alibi_bias_max_num_heads_not_power_of_two(
163+
self,
164+
):
165+
alibi_bias_max = 6
166+
batch_size = 1
167+
num_heads = 3
168+
query_length = 1
169+
key_length = 3
170+
input_shape = (batch_size, num_heads, query_length, key_length)
171+
input_tensor = ops.zeros(input_shape)
172+
layer = AlibiBias(alibi_bias_max=alibi_bias_max)
173+
output_tensor = layer(input_tensor)
174+
self.assertAllClose(
175+
output_tensor,
176+
ops.convert_to_tensor(
177+
[
178+
[
179+
[[-0.25, -0.125, 0.0]],
180+
[[-0.03125, -0.015625, 0.0]],
181+
[[-0.70710677, -0.35355338, 0.0]],
182+
]
183+
]
184+
),
185+
)

0 commit comments

Comments
 (0)