-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[Offload][Conformance] Add exhaustive tests for half-precision math functions #155112
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-offload Author: Leandro Lacerda (leandrolcampos) ChangesThis patch adds a set of randomized conformance tests for double-precision math functions. The functions included in this set were selected based on the following criteria:
This patch also fixes the testing range of the following functions: Patch is 78.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155112.diff 32 Files Affected:
diff --git a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
index 32f21991d9ec3..894652a8e1af1 100644
--- a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
+++ b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
@@ -103,49 +103,68 @@ extern "C" {
double __ocml_acos_f64(double);
float __ocml_acos_f32(float);
+float16 __ocml_acos_f16(float16);
float __ocml_acosh_f32(float);
+float16 __ocml_acosh_f16(float16);
double __ocml_asin_f64(double);
float __ocml_asin_f32(float);
+float16 __ocml_asin_f16(float16);
float __ocml_asinh_f32(float);
+float16 __ocml_asinh_f16(float16);
float __ocml_atan_f32(float);
+float16 __ocml_atan_f16(float16);
float __ocml_atan2_f32(float, float);
float __ocml_atanh_f32(float);
+float16 __ocml_atanh_f16(float16);
double __ocml_cbrt_f64(double);
float __ocml_cbrt_f32(float);
double __ocml_cos_f64(double);
float __ocml_cos_f32(float);
+float16 __ocml_cos_f16(float16);
float __ocml_cosh_f32(float);
+float16 __ocml_cosh_f16(float16);
float __ocml_cospi_f32(float);
float __ocml_erf_f32(float);
double __ocml_exp_f64(double);
float __ocml_exp_f32(float);
+float16 __ocml_exp_f16(float16);
double __ocml_exp10_f64(double);
float __ocml_exp10_f32(float);
+float16 __ocml_exp10_f16(float16);
double __ocml_exp2_f64(double);
float __ocml_exp2_f32(float);
+float16 __ocml_exp2_f16(float16);
double __ocml_expm1_f64(double);
float __ocml_expm1_f32(float);
+float16 __ocml_expm1_f16(float16);
double __ocml_hypot_f64(double, double);
float __ocml_hypot_f32(float, float);
double __ocml_log_f64(double);
float __ocml_log_f32(float);
+float16 __ocml_log_f16(float16);
double __ocml_log10_f64(double);
float __ocml_log10_f32(float);
+float16 __ocml_log10_f16(float16);
double __ocml_log1p_f64(double);
float __ocml_log1p_f32(float);
double __ocml_log2_f64(double);
float __ocml_log2_f32(float);
+float16 __ocml_log2_f16(float16);
float __ocml_pow_f32(float, float);
float __ocml_round_f32(float);
double __ocml_sin_f64(double);
float __ocml_sin_f32(float);
+float16 __ocml_sin_f16(float16);
double __ocml_sincos_f64(double, double *);
float __ocml_sincos_f32(float, float *);
float __ocml_sinh_f32(float);
+float16 __ocml_sinh_f16(float16);
float __ocml_sinpi_f32(float);
double __ocml_tan_f64(double);
float __ocml_tan_f32(float);
+float16 __ocml_tan_f16(float16);
float __ocml_tanh_f32(float);
+float16 __ocml_tanh_f16(float16);
} // extern "C"
#endif // HIP_MATH_FOUND
diff --git a/offload/unittests/Conformance/device_code/HIPMath.cpp b/offload/unittests/Conformance/device_code/HIPMath.cpp
index 71dea4c8d2656..7cc0ad5d9142e 100644
--- a/offload/unittests/Conformance/device_code/HIPMath.cpp
+++ b/offload/unittests/Conformance/device_code/HIPMath.cpp
@@ -70,11 +70,21 @@ __gpu_kernel void acosfKernel(const float *X, float *Out,
runKernelBody<__ocml_acos_f32>(NumElements, Out, X);
}
+__gpu_kernel void acosf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_acos_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void acoshfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_acosh_f32>(NumElements, Out, X);
}
+__gpu_kernel void acoshf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_acosh_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void asinKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_asin_f64>(NumElements, Out, X);
@@ -85,16 +95,31 @@ __gpu_kernel void asinfKernel(const float *X, float *Out,
runKernelBody<__ocml_asin_f32>(NumElements, Out, X);
}
+__gpu_kernel void asinf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_asin_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void asinhfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_asinh_f32>(NumElements, Out, X);
}
+__gpu_kernel void asinhf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_asinh_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void atanfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_atan_f32>(NumElements, Out, X);
}
+__gpu_kernel void atanf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_atan_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void atan2fKernel(const float *X, const float *Y, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_atan2_f32>(NumElements, Out, X, Y);
@@ -105,6 +130,11 @@ __gpu_kernel void atanhfKernel(const float *X, float *Out,
runKernelBody<__ocml_atanh_f32>(NumElements, Out, X);
}
+__gpu_kernel void atanhf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_atanh_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void cbrtKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_cbrt_f64>(NumElements, Out, X);
@@ -125,11 +155,21 @@ __gpu_kernel void cosfKernel(const float *X, float *Out,
runKernelBody<__ocml_cos_f32>(NumElements, Out, X);
}
+__gpu_kernel void cosf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_cos_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void coshfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_cosh_f32>(NumElements, Out, X);
}
+__gpu_kernel void coshf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_cosh_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void cospifKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_cospi_f32>(NumElements, Out, X);
@@ -150,6 +190,11 @@ __gpu_kernel void expfKernel(const float *X, float *Out,
runKernelBody<__ocml_exp_f32>(NumElements, Out, X);
}
+__gpu_kernel void expf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_exp_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void exp10Kernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_exp10_f64>(NumElements, Out, X);
@@ -160,6 +205,11 @@ __gpu_kernel void exp10fKernel(const float *X, float *Out,
runKernelBody<__ocml_exp10_f32>(NumElements, Out, X);
}
+__gpu_kernel void exp10f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_exp10_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void exp2Kernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_exp2_f64>(NumElements, Out, X);
@@ -170,6 +220,11 @@ __gpu_kernel void exp2fKernel(const float *X, float *Out,
runKernelBody<__ocml_exp2_f32>(NumElements, Out, X);
}
+__gpu_kernel void exp2f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_exp2_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void expm1Kernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_expm1_f64>(NumElements, Out, X);
@@ -180,6 +235,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
runKernelBody<__ocml_expm1_f32>(NumElements, Out, X);
}
+__gpu_kernel void expm1f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_expm1_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void hypotKernel(const double *X, const double *Y, double *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_hypot_f64>(NumElements, Out, X, Y);
@@ -200,6 +260,11 @@ __gpu_kernel void logfKernel(const float *X, float *Out,
runKernelBody<__ocml_log_f32>(NumElements, Out, X);
}
+__gpu_kernel void logf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_log_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void log10Kernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_log10_f64>(NumElements, Out, X);
@@ -210,6 +275,11 @@ __gpu_kernel void log10fKernel(const float *X, float *Out,
runKernelBody<__ocml_log10_f32>(NumElements, Out, X);
}
+__gpu_kernel void log10f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_log10_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void log1pKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_log1p_f64>(NumElements, Out, X);
@@ -230,6 +300,11 @@ __gpu_kernel void log2fKernel(const float *X, float *Out,
runKernelBody<__ocml_log2_f32>(NumElements, Out, X);
}
+__gpu_kernel void log2f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_log2_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void powfKernel(const float *X, float *Y, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_pow_f32>(NumElements, Out, X, Y);
@@ -251,6 +326,11 @@ __gpu_kernel void sinfKernel(const float *X, float *Out,
runKernelBody<__ocml_sin_f32>(NumElements, Out, X);
}
+__gpu_kernel void sinf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_sin_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void sincosSinKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<sincosSin>(NumElements, Out, X);
@@ -276,6 +356,11 @@ __gpu_kernel void sinhfKernel(const float *X, float *Out,
runKernelBody<__ocml_sinh_f32>(NumElements, Out, X);
}
+__gpu_kernel void sinhf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_sinh_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void sinpifKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_sinpi_f32>(NumElements, Out, X);
@@ -291,10 +376,20 @@ __gpu_kernel void tanfKernel(const float *X, float *Out,
runKernelBody<__ocml_tan_f32>(NumElements, Out, X);
}
+__gpu_kernel void tanf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_tan_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void tanhfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_tanh_f32>(NumElements, Out, X);
}
+
+__gpu_kernel void tanhf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_tanh_f16>(NumElements, Out, X);
+}
} // extern "C"
#endif // HIP_MATH_FOUND
diff --git a/offload/unittests/Conformance/device_code/LLVMLibm.cpp b/offload/unittests/Conformance/device_code/LLVMLibm.cpp
index baf23a3467f35..8673d809fd0a2 100644
--- a/offload/unittests/Conformance/device_code/LLVMLibm.cpp
+++ b/offload/unittests/Conformance/device_code/LLVMLibm.cpp
@@ -69,11 +69,26 @@ __gpu_kernel void acosfKernel(const float *X, float *Out,
runKernelBody<acosf>(NumElements, Out, X);
}
+__gpu_kernel void acosf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<acosf16>(NumElements, Out, X);
+}
+
__gpu_kernel void acoshfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<acoshf>(NumElements, Out, X);
}
+__gpu_kernel void acoshf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<acoshf16>(NumElements, Out, X);
+}
+
+__gpu_kernel void acospif16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<acospif16>(NumElements, Out, X);
+}
+
__gpu_kernel void asinKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<asin>(NumElements, Out, X);
@@ -84,16 +99,31 @@ __gpu_kernel void asinfKernel(const float *X, float *Out,
runKernelBody<asinf>(NumElements, Out, X);
}
+__gpu_kernel void asinf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<asinf16>(NumElements, Out, X);
+}
+
__gpu_kernel void asinhfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<asinhf>(NumElements, Out, X);
}
+__gpu_kernel void asinhf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<asinhf16>(NumElements, Out, X);
+}
+
__gpu_kernel void atanfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<atanf>(NumElements, Out, X);
}
+__gpu_kernel void atanf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<atanf16>(NumElements, Out, X);
+}
+
__gpu_kernel void atan2fKernel(const float *X, const float *Y, float *Out,
size_t NumElements) noexcept {
runKernelBody<atan2f>(NumElements, Out, X, Y);
@@ -104,6 +134,11 @@ __gpu_kernel void atanhfKernel(const float *X, float *Out,
runKernelBody<atanhf>(NumElements, Out, X);
}
+__gpu_kernel void atanhf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<atanhf16>(NumElements, Out, X);
+}
+
__gpu_kernel void cbrtKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<cbrt>(NumElements, Out, X);
@@ -124,16 +159,31 @@ __gpu_kernel void cosfKernel(const float *X, float *Out,
runKernelBody<cosf>(NumElements, Out, X);
}
+__gpu_kernel void cosf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<cosf16>(NumElements, Out, X);
+}
+
__gpu_kernel void coshfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<coshf>(NumElements, Out, X);
}
+__gpu_kernel void coshf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<coshf16>(NumElements, Out, X);
+}
+
__gpu_kernel void cospifKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<cospif>(NumElements, Out, X);
}
+__gpu_kernel void cospif16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<cospif16>(NumElements, Out, X);
+}
+
__gpu_kernel void erffKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<erff>(NumElements, Out, X);
@@ -149,6 +199,11 @@ __gpu_kernel void expfKernel(const float *X, float *Out,
runKernelBody<expf>(NumElements, Out, X);
}
+__gpu_kernel void expf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<expf16>(NumElements, Out, X);
+}
+
__gpu_kernel void exp10Kernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<exp10>(NumElements, Out, X);
@@ -159,6 +214,11 @@ __gpu_kernel void exp10fKernel(const float *X, float *Out,
runKernelBody<exp10f>(NumElements, Out, X);
}
+__gpu_kernel void exp10f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<exp10f16>(NumElements, Out, X);
+}
+
__gpu_kernel void exp2Kernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<exp2>(NumElements, Out, X);
@@ -169,6 +229,11 @@ __gpu_kernel void exp2fKernel(const float *X, float *Out,
runKernelBody<exp2f>(NumElements, Out, X);
}
+__gpu_kernel void exp2f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<exp2f16>(NumElements, Out, X);
+}
+
__gpu_kernel void expm1Kernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<expm1>(NumElements, Out, X);
@@ -179,6 +244,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
runKernelBody<expm1f>(NumElements, Out, X);
}
+__gpu_kernel void expm1f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<expm1f16>(NumElements, Out, X);
+}
+
__gpu_kernel void hypotKernel(const double *X, const double *Y, double *Out,
size_t NumElements) noexcept {
runKernelBody<hypot>(NumElements, Out, X, Y);
@@ -204,6 +274,11 @@ __gpu_kernel void logfKernel(const float *X, float *Out,
runKernelBody<logf>(NumElements, Out, X);
}
+__gpu_kernel void logf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<logf16>(NumElements, Out, X);
+}
+
__gpu_kernel void log10Kernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<log10>(NumElements, Out, X);
@@ -214,6 +289,11 @@ __gpu_kernel void log10fKernel(const float *X, float *Out,
runKernelBody<log10f>(NumElements, Out, X);
}
+__gpu_kernel void log10f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<log10f16>(NumElements, Out, X);
+}
+
__gpu_kernel void log1pKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<log1p>(NumElements, Out, X);
@@ -234,6 +314,11 @@ __gpu_kernel void log2fKernel(const float *X, float *Out,
runKernelBody<log2f>(NumElements, Out, X);
}
+__gpu_kernel void log2f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<log2f16>(NumElements, Out, X);
+}
+
__gpu_kernel void powfKernel(const float *X, float *Y, float *Out,
size_t NumElements) noexcept {
runKernelBody<powf>(NumElements, Out, X, Y);
@@ -255,6 +340,11 @@ __gpu_kernel void sinfKernel(const float *X, float *Out,
runKernelBody<sinf>(NumElements, Out, X);
}
+__gpu_kernel void sinf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<sinf16>(NumElements, Out, X);
+}
+
__gpu_kernel void sincosSinKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<sincosSin>(NumElements, Out, X);
@@ -280,11 +370,21 @@ __gpu_kernel void sinhfKernel(const float *X, float *Out,
runKernelBody<sinhf>(NumElements, Out, X);
}
+__gpu_kernel void sinhf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<sinhf16>(NumElements, Out, X);
+}
+
__gpu_kernel void sinpifKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<sinpif>(NumElements, Out, X);
}
+__gpu_kernel void sinpif16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<sinpif16>(NumElements, Out, X);
+}
+
__...
[truncated]
|
Exhaustive Test Results for Half-Precision Math Functions
|
This patch adds a set of exhaustive tests for half-precision math.
The functions included in this set were selected based on the following criteria:
libc/src/math/generic
(i.e., it is not just a wrapper around a compiler built-in).This patch also fixes the testing range of the following functions:
acos
,acosf
,asin
,asinf
, andlog1p
.