Skip to content

Commit bf61512

Browse files
authored
Optimize pow5_12(::Float32) (#493)
1 parent 9710272 commit bf61512

File tree

2 files changed

+18
-3
lines changed

2 files changed

+18
-3
lines changed

src/conversions.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,9 @@ correct_gamut(c::CV) where {CV<:TransparentRGB} =
8181
@inline function srgb_compand(v)
8282
F = typeof(0.5f0v) === Float32 ? Float32 : promote_type(Float64, typeof(v))
8383
vf = F(v)
84+
vc = @fastmath max(vf, F(0.0031308))
8485
# `pow5_12` is an optimized function to get `v^(1/2.4)`
85-
vf > F(0.0031308) ? muladd(F(1.055), F(pow5_12(vf)), F(-0.055)) : F(12.92) * vf
86+
vf > F(0.0031308) ? muladd(F(1.055), F(pow5_12(vc)), F(-0.055)) : F(12.92) * vf
8687
end
8788

8889
function _hsx_to_rgb(im::UInt8, v, n, m)

src/utilities.jl

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ pow3_4(x) = (y = @fastmath(sqrt(x)); y*@fastmath(sqrt(y))) # x^(3/4)
9292

9393
# `pow5_12` is called from `srgb_compand`.
9494
pow5_12(x) = pow3_4(x) / cbrt(x) # 5/12 == 1/2 + 1/4 - 1/3 == 3/4 - 1/3
95-
pow5_12(x::Float32) = Float32(pow5_12(Float64(x)))
9695
@inline function pow5_12(x::Float64)
96+
@noinline _cbrt(x) = cbrt01(x)
9797
p3_4 = pow3_4(x)
9898
# x^(-1/6)
9999
if x < 0.02
@@ -106,7 +106,7 @@ pow5_12(x::Float32) = Float32(pow5_12(Float64(x)))
106106
t0 = @evalpoly(x, 1.7047813285940905, -3.1261253501167308,
107107
7.498744828350077, -10.100319516746419, 6.820601476522508, -1.7978894213531524)
108108
else
109-
return p3_4 / cbrt(x)
109+
return p3_4 / _cbrt(x)
110110
end
111111
# x^(-1/3)
112112
t1 = t0 * t0
@@ -117,6 +117,20 @@ pow5_12(x::Float32) = Float32(pow5_12(Float64(x)))
117117
# x^(3/4) * x^(-1/3)
118118
muladd(p3_4, t2, p3_4 * t2h)
119119
end
120+
@inline function pow5_12(x::Float32)
121+
# x^(-1/3)
122+
rc = rcbrt(x)
123+
rcx = -rc * x
124+
rch = muladd(muladd(rc, x, rcx), -rc^2, muladd(rc^2, rcx, 1.0f0)) # 1 - x * rc^3
125+
rce = muladd(2/9f0, rch, 1/3f0) * rch * rc
126+
# x^(3/4)
127+
p3_4_f64 = pow3_4(Float64(x))
128+
p3_4r = reinterpret(Float64, reinterpret(UInt64, p3_4_f64) & 0xffffffff_e0000000)
129+
p3_4 = Float32(p3_4r)
130+
p3_4e = Float32(p3_4_f64 - p3_4r)
131+
# x^(3/4) * x^(-1/3)
132+
muladd(p3_4, rc, muladd(p3_4, rce, p3_4e * rc))
133+
end
120134

121135
# `pow12_5` is called from `invert_srgb_compand`.
122136
pow12_5(x) = pow12_5(Float64(x))

0 commit comments

Comments
 (0)