@@ -70,11 +70,21 @@ __gpu_kernel void acosfKernel(const float *X, float *Out,
70
70
runKernelBody<__ocml_acos_f32>(NumElements, Out, X);
71
71
}
72
72
73
+ __gpu_kernel void acosf16Kernel (const float16 *X, float16 *Out,
74
+ size_t NumElements) noexcept {
75
+ runKernelBody<__ocml_acos_f16>(NumElements, Out, X);
76
+ }
77
+
73
78
__gpu_kernel void acoshfKernel (const float *X, float *Out,
74
79
size_t NumElements) noexcept {
75
80
runKernelBody<__ocml_acosh_f32>(NumElements, Out, X);
76
81
}
77
82
83
+ __gpu_kernel void acoshf16Kernel (const float16 *X, float16 *Out,
84
+ size_t NumElements) noexcept {
85
+ runKernelBody<__ocml_acosh_f16>(NumElements, Out, X);
86
+ }
87
+
78
88
__gpu_kernel void asinKernel (const double *X, double *Out,
79
89
size_t NumElements) noexcept {
80
90
runKernelBody<__ocml_asin_f64>(NumElements, Out, X);
@@ -85,16 +95,31 @@ __gpu_kernel void asinfKernel(const float *X, float *Out,
85
95
runKernelBody<__ocml_asin_f32>(NumElements, Out, X);
86
96
}
87
97
98
+ __gpu_kernel void asinf16Kernel (const float16 *X, float16 *Out,
99
+ size_t NumElements) noexcept {
100
+ runKernelBody<__ocml_asin_f16>(NumElements, Out, X);
101
+ }
102
+
88
103
__gpu_kernel void asinhfKernel (const float *X, float *Out,
89
104
size_t NumElements) noexcept {
90
105
runKernelBody<__ocml_asinh_f32>(NumElements, Out, X);
91
106
}
92
107
108
+ __gpu_kernel void asinhf16Kernel (const float16 *X, float16 *Out,
109
+ size_t NumElements) noexcept {
110
+ runKernelBody<__ocml_asinh_f16>(NumElements, Out, X);
111
+ }
112
+
93
113
__gpu_kernel void atanfKernel (const float *X, float *Out,
94
114
size_t NumElements) noexcept {
95
115
runKernelBody<__ocml_atan_f32>(NumElements, Out, X);
96
116
}
97
117
118
+ __gpu_kernel void atanf16Kernel (const float16 *X, float16 *Out,
119
+ size_t NumElements) noexcept {
120
+ runKernelBody<__ocml_atan_f16>(NumElements, Out, X);
121
+ }
122
+
98
123
__gpu_kernel void atan2fKernel (const float *X, const float *Y, float *Out,
99
124
size_t NumElements) noexcept {
100
125
runKernelBody<__ocml_atan2_f32>(NumElements, Out, X, Y);
@@ -105,6 +130,11 @@ __gpu_kernel void atanhfKernel(const float *X, float *Out,
105
130
runKernelBody<__ocml_atanh_f32>(NumElements, Out, X);
106
131
}
107
132
133
+ __gpu_kernel void atanhf16Kernel (const float16 *X, float16 *Out,
134
+ size_t NumElements) noexcept {
135
+ runKernelBody<__ocml_atanh_f16>(NumElements, Out, X);
136
+ }
137
+
108
138
__gpu_kernel void cbrtKernel (const double *X, double *Out,
109
139
size_t NumElements) noexcept {
110
140
runKernelBody<__ocml_cbrt_f64>(NumElements, Out, X);
@@ -125,11 +155,21 @@ __gpu_kernel void cosfKernel(const float *X, float *Out,
125
155
runKernelBody<__ocml_cos_f32>(NumElements, Out, X);
126
156
}
127
157
158
+ __gpu_kernel void cosf16Kernel (const float16 *X, float16 *Out,
159
+ size_t NumElements) noexcept {
160
+ runKernelBody<__ocml_cos_f16>(NumElements, Out, X);
161
+ }
162
+
128
163
__gpu_kernel void coshfKernel (const float *X, float *Out,
129
164
size_t NumElements) noexcept {
130
165
runKernelBody<__ocml_cosh_f32>(NumElements, Out, X);
131
166
}
132
167
168
+ __gpu_kernel void coshf16Kernel (const float16 *X, float16 *Out,
169
+ size_t NumElements) noexcept {
170
+ runKernelBody<__ocml_cosh_f16>(NumElements, Out, X);
171
+ }
172
+
133
173
__gpu_kernel void cospifKernel (const float *X, float *Out,
134
174
size_t NumElements) noexcept {
135
175
runKernelBody<__ocml_cospi_f32>(NumElements, Out, X);
@@ -150,6 +190,11 @@ __gpu_kernel void expfKernel(const float *X, float *Out,
150
190
runKernelBody<__ocml_exp_f32>(NumElements, Out, X);
151
191
}
152
192
193
+ __gpu_kernel void expf16Kernel (const float16 *X, float16 *Out,
194
+ size_t NumElements) noexcept {
195
+ runKernelBody<__ocml_exp_f16>(NumElements, Out, X);
196
+ }
197
+
153
198
__gpu_kernel void exp10Kernel (const double *X, double *Out,
154
199
size_t NumElements) noexcept {
155
200
runKernelBody<__ocml_exp10_f64>(NumElements, Out, X);
@@ -160,6 +205,11 @@ __gpu_kernel void exp10fKernel(const float *X, float *Out,
160
205
runKernelBody<__ocml_exp10_f32>(NumElements, Out, X);
161
206
}
162
207
208
+ __gpu_kernel void exp10f16Kernel (const float16 *X, float16 *Out,
209
+ size_t NumElements) noexcept {
210
+ runKernelBody<__ocml_exp10_f16>(NumElements, Out, X);
211
+ }
212
+
163
213
__gpu_kernel void exp2Kernel (const double *X, double *Out,
164
214
size_t NumElements) noexcept {
165
215
runKernelBody<__ocml_exp2_f64>(NumElements, Out, X);
@@ -170,6 +220,11 @@ __gpu_kernel void exp2fKernel(const float *X, float *Out,
170
220
runKernelBody<__ocml_exp2_f32>(NumElements, Out, X);
171
221
}
172
222
223
+ __gpu_kernel void exp2f16Kernel (const float16 *X, float16 *Out,
224
+ size_t NumElements) noexcept {
225
+ runKernelBody<__ocml_exp2_f16>(NumElements, Out, X);
226
+ }
227
+
173
228
__gpu_kernel void expm1Kernel (const double *X, double *Out,
174
229
size_t NumElements) noexcept {
175
230
runKernelBody<__ocml_expm1_f64>(NumElements, Out, X);
@@ -180,6 +235,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
180
235
runKernelBody<__ocml_expm1_f32>(NumElements, Out, X);
181
236
}
182
237
238
+ __gpu_kernel void expm1f16Kernel (const float16 *X, float16 *Out,
239
+ size_t NumElements) noexcept {
240
+ runKernelBody<__ocml_expm1_f16>(NumElements, Out, X);
241
+ }
242
+
183
243
__gpu_kernel void hypotKernel (const double *X, const double *Y, double *Out,
184
244
size_t NumElements) noexcept {
185
245
runKernelBody<__ocml_hypot_f64>(NumElements, Out, X, Y);
@@ -200,6 +260,11 @@ __gpu_kernel void logfKernel(const float *X, float *Out,
200
260
runKernelBody<__ocml_log_f32>(NumElements, Out, X);
201
261
}
202
262
263
+ __gpu_kernel void logf16Kernel (const float16 *X, float16 *Out,
264
+ size_t NumElements) noexcept {
265
+ runKernelBody<__ocml_log_f16>(NumElements, Out, X);
266
+ }
267
+
203
268
__gpu_kernel void log10Kernel (const double *X, double *Out,
204
269
size_t NumElements) noexcept {
205
270
runKernelBody<__ocml_log10_f64>(NumElements, Out, X);
@@ -210,6 +275,11 @@ __gpu_kernel void log10fKernel(const float *X, float *Out,
210
275
runKernelBody<__ocml_log10_f32>(NumElements, Out, X);
211
276
}
212
277
278
+ __gpu_kernel void log10f16Kernel (const float16 *X, float16 *Out,
279
+ size_t NumElements) noexcept {
280
+ runKernelBody<__ocml_log10_f16>(NumElements, Out, X);
281
+ }
282
+
213
283
__gpu_kernel void log1pKernel (const double *X, double *Out,
214
284
size_t NumElements) noexcept {
215
285
runKernelBody<__ocml_log1p_f64>(NumElements, Out, X);
@@ -230,6 +300,11 @@ __gpu_kernel void log2fKernel(const float *X, float *Out,
230
300
runKernelBody<__ocml_log2_f32>(NumElements, Out, X);
231
301
}
232
302
303
+ __gpu_kernel void log2f16Kernel (const float16 *X, float16 *Out,
304
+ size_t NumElements) noexcept {
305
+ runKernelBody<__ocml_log2_f16>(NumElements, Out, X);
306
+ }
307
+
233
308
__gpu_kernel void powfKernel (const float *X, float *Y, float *Out,
234
309
size_t NumElements) noexcept {
235
310
runKernelBody<__ocml_pow_f32>(NumElements, Out, X, Y);
@@ -251,6 +326,11 @@ __gpu_kernel void sinfKernel(const float *X, float *Out,
251
326
runKernelBody<__ocml_sin_f32>(NumElements, Out, X);
252
327
}
253
328
329
+ __gpu_kernel void sinf16Kernel (const float16 *X, float16 *Out,
330
+ size_t NumElements) noexcept {
331
+ runKernelBody<__ocml_sin_f16>(NumElements, Out, X);
332
+ }
333
+
254
334
__gpu_kernel void sincosSinKernel (const double *X, double *Out,
255
335
size_t NumElements) noexcept {
256
336
runKernelBody<sincosSin>(NumElements, Out, X);
@@ -276,6 +356,11 @@ __gpu_kernel void sinhfKernel(const float *X, float *Out,
276
356
runKernelBody<__ocml_sinh_f32>(NumElements, Out, X);
277
357
}
278
358
359
+ __gpu_kernel void sinhf16Kernel (const float16 *X, float16 *Out,
360
+ size_t NumElements) noexcept {
361
+ runKernelBody<__ocml_sinh_f16>(NumElements, Out, X);
362
+ }
363
+
279
364
__gpu_kernel void sinpifKernel (const float *X, float *Out,
280
365
size_t NumElements) noexcept {
281
366
runKernelBody<__ocml_sinpi_f32>(NumElements, Out, X);
@@ -291,10 +376,20 @@ __gpu_kernel void tanfKernel(const float *X, float *Out,
291
376
runKernelBody<__ocml_tan_f32>(NumElements, Out, X);
292
377
}
293
378
379
+ __gpu_kernel void tanf16Kernel (const float16 *X, float16 *Out,
380
+ size_t NumElements) noexcept {
381
+ runKernelBody<__ocml_tan_f16>(NumElements, Out, X);
382
+ }
383
+
294
384
__gpu_kernel void tanhfKernel (const float *X, float *Out,
295
385
size_t NumElements) noexcept {
296
386
runKernelBody<__ocml_tanh_f32>(NumElements, Out, X);
297
387
}
388
+
389
+ __gpu_kernel void tanhf16Kernel (const float16 *X, float16 *Out,
390
+ size_t NumElements) noexcept {
391
+ runKernelBody<__ocml_tanh_f16>(NumElements, Out, X);
392
+ }
298
393
} // extern "C"
299
394
300
395
#endif // HIP_MATH_FOUND
0 commit comments