Skip to content

Commit 5ef4120

Browse files
[Offload][Conformance] Add exhaustive tests for half-precision math functions (#155112)
This patch adds a set of exhaustive tests for half-precision math. The functions included in this set were selected based on the following criteria: - An implementation exists in `libc/src/math/generic` (i.e., it is not just a wrapper around a compiler built-in). - The corresponding LLVM CPU libm implementation is correctly rounded. - The function is listed in Table 69 of the OpenCL C Specification v3.0.19. This patch also fixes the testing range of the following functions: `acos`, `acosf`, `asin`, `asinf`, and `log1p`.
1 parent 11c6158 commit 5ef4120

32 files changed

+1644
-5
lines changed

offload/unittests/Conformance/device_code/DeviceAPIs.hpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,49 +103,68 @@ extern "C" {
103103

104104
double __ocml_acos_f64(double);
105105
float __ocml_acos_f32(float);
106+
float16 __ocml_acos_f16(float16);
106107
float __ocml_acosh_f32(float);
108+
float16 __ocml_acosh_f16(float16);
107109
double __ocml_asin_f64(double);
108110
float __ocml_asin_f32(float);
111+
float16 __ocml_asin_f16(float16);
109112
float __ocml_asinh_f32(float);
113+
float16 __ocml_asinh_f16(float16);
110114
float __ocml_atan_f32(float);
115+
float16 __ocml_atan_f16(float16);
111116
float __ocml_atan2_f32(float, float);
112117
float __ocml_atanh_f32(float);
118+
float16 __ocml_atanh_f16(float16);
113119
double __ocml_cbrt_f64(double);
114120
float __ocml_cbrt_f32(float);
115121
double __ocml_cos_f64(double);
116122
float __ocml_cos_f32(float);
123+
float16 __ocml_cos_f16(float16);
117124
float __ocml_cosh_f32(float);
125+
float16 __ocml_cosh_f16(float16);
118126
float __ocml_cospi_f32(float);
119127
float __ocml_erf_f32(float);
120128
double __ocml_exp_f64(double);
121129
float __ocml_exp_f32(float);
130+
float16 __ocml_exp_f16(float16);
122131
double __ocml_exp10_f64(double);
123132
float __ocml_exp10_f32(float);
133+
float16 __ocml_exp10_f16(float16);
124134
double __ocml_exp2_f64(double);
125135
float __ocml_exp2_f32(float);
136+
float16 __ocml_exp2_f16(float16);
126137
double __ocml_expm1_f64(double);
127138
float __ocml_expm1_f32(float);
139+
float16 __ocml_expm1_f16(float16);
128140
double __ocml_hypot_f64(double, double);
129141
float __ocml_hypot_f32(float, float);
130142
double __ocml_log_f64(double);
131143
float __ocml_log_f32(float);
144+
float16 __ocml_log_f16(float16);
132145
double __ocml_log10_f64(double);
133146
float __ocml_log10_f32(float);
147+
float16 __ocml_log10_f16(float16);
134148
double __ocml_log1p_f64(double);
135149
float __ocml_log1p_f32(float);
136150
double __ocml_log2_f64(double);
137151
float __ocml_log2_f32(float);
152+
float16 __ocml_log2_f16(float16);
138153
float __ocml_pow_f32(float, float);
139154
float __ocml_round_f32(float);
140155
double __ocml_sin_f64(double);
141156
float __ocml_sin_f32(float);
157+
float16 __ocml_sin_f16(float16);
142158
double __ocml_sincos_f64(double, double *);
143159
float __ocml_sincos_f32(float, float *);
144160
float __ocml_sinh_f32(float);
161+
float16 __ocml_sinh_f16(float16);
145162
float __ocml_sinpi_f32(float);
146163
double __ocml_tan_f64(double);
147164
float __ocml_tan_f32(float);
165+
float16 __ocml_tan_f16(float16);
148166
float __ocml_tanh_f32(float);
167+
float16 __ocml_tanh_f16(float16);
149168
} // extern "C"
150169

151170
#endif // HIP_MATH_FOUND

offload/unittests/Conformance/device_code/HIPMath.cpp

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,21 @@ __gpu_kernel void acosfKernel(const float *X, float *Out,
7070
runKernelBody<__ocml_acos_f32>(NumElements, Out, X);
7171
}
7272

73+
__gpu_kernel void acosf16Kernel(const float16 *X, float16 *Out,
74+
size_t NumElements) noexcept {
75+
runKernelBody<__ocml_acos_f16>(NumElements, Out, X);
76+
}
77+
7378
__gpu_kernel void acoshfKernel(const float *X, float *Out,
7479
size_t NumElements) noexcept {
7580
runKernelBody<__ocml_acosh_f32>(NumElements, Out, X);
7681
}
7782

83+
__gpu_kernel void acoshf16Kernel(const float16 *X, float16 *Out,
84+
size_t NumElements) noexcept {
85+
runKernelBody<__ocml_acosh_f16>(NumElements, Out, X);
86+
}
87+
7888
__gpu_kernel void asinKernel(const double *X, double *Out,
7989
size_t NumElements) noexcept {
8090
runKernelBody<__ocml_asin_f64>(NumElements, Out, X);
@@ -85,16 +95,31 @@ __gpu_kernel void asinfKernel(const float *X, float *Out,
8595
runKernelBody<__ocml_asin_f32>(NumElements, Out, X);
8696
}
8797

98+
__gpu_kernel void asinf16Kernel(const float16 *X, float16 *Out,
99+
size_t NumElements) noexcept {
100+
runKernelBody<__ocml_asin_f16>(NumElements, Out, X);
101+
}
102+
88103
__gpu_kernel void asinhfKernel(const float *X, float *Out,
89104
size_t NumElements) noexcept {
90105
runKernelBody<__ocml_asinh_f32>(NumElements, Out, X);
91106
}
92107

108+
__gpu_kernel void asinhf16Kernel(const float16 *X, float16 *Out,
109+
size_t NumElements) noexcept {
110+
runKernelBody<__ocml_asinh_f16>(NumElements, Out, X);
111+
}
112+
93113
__gpu_kernel void atanfKernel(const float *X, float *Out,
94114
size_t NumElements) noexcept {
95115
runKernelBody<__ocml_atan_f32>(NumElements, Out, X);
96116
}
97117

118+
__gpu_kernel void atanf16Kernel(const float16 *X, float16 *Out,
119+
size_t NumElements) noexcept {
120+
runKernelBody<__ocml_atan_f16>(NumElements, Out, X);
121+
}
122+
98123
__gpu_kernel void atan2fKernel(const float *X, const float *Y, float *Out,
99124
size_t NumElements) noexcept {
100125
runKernelBody<__ocml_atan2_f32>(NumElements, Out, X, Y);
@@ -105,6 +130,11 @@ __gpu_kernel void atanhfKernel(const float *X, float *Out,
105130
runKernelBody<__ocml_atanh_f32>(NumElements, Out, X);
106131
}
107132

133+
__gpu_kernel void atanhf16Kernel(const float16 *X, float16 *Out,
134+
size_t NumElements) noexcept {
135+
runKernelBody<__ocml_atanh_f16>(NumElements, Out, X);
136+
}
137+
108138
__gpu_kernel void cbrtKernel(const double *X, double *Out,
109139
size_t NumElements) noexcept {
110140
runKernelBody<__ocml_cbrt_f64>(NumElements, Out, X);
@@ -125,11 +155,21 @@ __gpu_kernel void cosfKernel(const float *X, float *Out,
125155
runKernelBody<__ocml_cos_f32>(NumElements, Out, X);
126156
}
127157

158+
__gpu_kernel void cosf16Kernel(const float16 *X, float16 *Out,
159+
size_t NumElements) noexcept {
160+
runKernelBody<__ocml_cos_f16>(NumElements, Out, X);
161+
}
162+
128163
__gpu_kernel void coshfKernel(const float *X, float *Out,
129164
size_t NumElements) noexcept {
130165
runKernelBody<__ocml_cosh_f32>(NumElements, Out, X);
131166
}
132167

168+
__gpu_kernel void coshf16Kernel(const float16 *X, float16 *Out,
169+
size_t NumElements) noexcept {
170+
runKernelBody<__ocml_cosh_f16>(NumElements, Out, X);
171+
}
172+
133173
__gpu_kernel void cospifKernel(const float *X, float *Out,
134174
size_t NumElements) noexcept {
135175
runKernelBody<__ocml_cospi_f32>(NumElements, Out, X);
@@ -150,6 +190,11 @@ __gpu_kernel void expfKernel(const float *X, float *Out,
150190
runKernelBody<__ocml_exp_f32>(NumElements, Out, X);
151191
}
152192

193+
__gpu_kernel void expf16Kernel(const float16 *X, float16 *Out,
194+
size_t NumElements) noexcept {
195+
runKernelBody<__ocml_exp_f16>(NumElements, Out, X);
196+
}
197+
153198
__gpu_kernel void exp10Kernel(const double *X, double *Out,
154199
size_t NumElements) noexcept {
155200
runKernelBody<__ocml_exp10_f64>(NumElements, Out, X);
@@ -160,6 +205,11 @@ __gpu_kernel void exp10fKernel(const float *X, float *Out,
160205
runKernelBody<__ocml_exp10_f32>(NumElements, Out, X);
161206
}
162207

208+
__gpu_kernel void exp10f16Kernel(const float16 *X, float16 *Out,
209+
size_t NumElements) noexcept {
210+
runKernelBody<__ocml_exp10_f16>(NumElements, Out, X);
211+
}
212+
163213
__gpu_kernel void exp2Kernel(const double *X, double *Out,
164214
size_t NumElements) noexcept {
165215
runKernelBody<__ocml_exp2_f64>(NumElements, Out, X);
@@ -170,6 +220,11 @@ __gpu_kernel void exp2fKernel(const float *X, float *Out,
170220
runKernelBody<__ocml_exp2_f32>(NumElements, Out, X);
171221
}
172222

223+
__gpu_kernel void exp2f16Kernel(const float16 *X, float16 *Out,
224+
size_t NumElements) noexcept {
225+
runKernelBody<__ocml_exp2_f16>(NumElements, Out, X);
226+
}
227+
173228
__gpu_kernel void expm1Kernel(const double *X, double *Out,
174229
size_t NumElements) noexcept {
175230
runKernelBody<__ocml_expm1_f64>(NumElements, Out, X);
@@ -180,6 +235,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
180235
runKernelBody<__ocml_expm1_f32>(NumElements, Out, X);
181236
}
182237

238+
__gpu_kernel void expm1f16Kernel(const float16 *X, float16 *Out,
239+
size_t NumElements) noexcept {
240+
runKernelBody<__ocml_expm1_f16>(NumElements, Out, X);
241+
}
242+
183243
__gpu_kernel void hypotKernel(const double *X, const double *Y, double *Out,
184244
size_t NumElements) noexcept {
185245
runKernelBody<__ocml_hypot_f64>(NumElements, Out, X, Y);
@@ -200,6 +260,11 @@ __gpu_kernel void logfKernel(const float *X, float *Out,
200260
runKernelBody<__ocml_log_f32>(NumElements, Out, X);
201261
}
202262

263+
__gpu_kernel void logf16Kernel(const float16 *X, float16 *Out,
264+
size_t NumElements) noexcept {
265+
runKernelBody<__ocml_log_f16>(NumElements, Out, X);
266+
}
267+
203268
__gpu_kernel void log10Kernel(const double *X, double *Out,
204269
size_t NumElements) noexcept {
205270
runKernelBody<__ocml_log10_f64>(NumElements, Out, X);
@@ -210,6 +275,11 @@ __gpu_kernel void log10fKernel(const float *X, float *Out,
210275
runKernelBody<__ocml_log10_f32>(NumElements, Out, X);
211276
}
212277

278+
__gpu_kernel void log10f16Kernel(const float16 *X, float16 *Out,
279+
size_t NumElements) noexcept {
280+
runKernelBody<__ocml_log10_f16>(NumElements, Out, X);
281+
}
282+
213283
__gpu_kernel void log1pKernel(const double *X, double *Out,
214284
size_t NumElements) noexcept {
215285
runKernelBody<__ocml_log1p_f64>(NumElements, Out, X);
@@ -230,6 +300,11 @@ __gpu_kernel void log2fKernel(const float *X, float *Out,
230300
runKernelBody<__ocml_log2_f32>(NumElements, Out, X);
231301
}
232302

303+
__gpu_kernel void log2f16Kernel(const float16 *X, float16 *Out,
304+
size_t NumElements) noexcept {
305+
runKernelBody<__ocml_log2_f16>(NumElements, Out, X);
306+
}
307+
233308
__gpu_kernel void powfKernel(const float *X, float *Y, float *Out,
234309
size_t NumElements) noexcept {
235310
runKernelBody<__ocml_pow_f32>(NumElements, Out, X, Y);
@@ -251,6 +326,11 @@ __gpu_kernel void sinfKernel(const float *X, float *Out,
251326
runKernelBody<__ocml_sin_f32>(NumElements, Out, X);
252327
}
253328

329+
__gpu_kernel void sinf16Kernel(const float16 *X, float16 *Out,
330+
size_t NumElements) noexcept {
331+
runKernelBody<__ocml_sin_f16>(NumElements, Out, X);
332+
}
333+
254334
__gpu_kernel void sincosSinKernel(const double *X, double *Out,
255335
size_t NumElements) noexcept {
256336
runKernelBody<sincosSin>(NumElements, Out, X);
@@ -276,6 +356,11 @@ __gpu_kernel void sinhfKernel(const float *X, float *Out,
276356
runKernelBody<__ocml_sinh_f32>(NumElements, Out, X);
277357
}
278358

359+
__gpu_kernel void sinhf16Kernel(const float16 *X, float16 *Out,
360+
size_t NumElements) noexcept {
361+
runKernelBody<__ocml_sinh_f16>(NumElements, Out, X);
362+
}
363+
279364
__gpu_kernel void sinpifKernel(const float *X, float *Out,
280365
size_t NumElements) noexcept {
281366
runKernelBody<__ocml_sinpi_f32>(NumElements, Out, X);
@@ -291,10 +376,20 @@ __gpu_kernel void tanfKernel(const float *X, float *Out,
291376
runKernelBody<__ocml_tan_f32>(NumElements, Out, X);
292377
}
293378

379+
__gpu_kernel void tanf16Kernel(const float16 *X, float16 *Out,
380+
size_t NumElements) noexcept {
381+
runKernelBody<__ocml_tan_f16>(NumElements, Out, X);
382+
}
383+
294384
__gpu_kernel void tanhfKernel(const float *X, float *Out,
295385
size_t NumElements) noexcept {
296386
runKernelBody<__ocml_tanh_f32>(NumElements, Out, X);
297387
}
388+
389+
__gpu_kernel void tanhf16Kernel(const float16 *X, float16 *Out,
390+
size_t NumElements) noexcept {
391+
runKernelBody<__ocml_tanh_f16>(NumElements, Out, X);
392+
}
298393
} // extern "C"
299394

300395
#endif // HIP_MATH_FOUND

0 commit comments

Comments
 (0)