Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 39 additions & 46 deletions mandelbrot/mandelbrot-fast.jl
Original file line number Diff line number Diff line change
@@ -1,74 +1,67 @@
#=
The Computer Language Benchmarks Game
https://salsa.debian.org/benchmarksgame-team/benchmarksgame/

direct transliteration of the swift#3 program by Ralph Ganszky and Daniel Muellenborn:
https://benchmarksgame-team.pages.debian.net/benchmarksgame/program/mandelbrot-swift-3.html
modified for Julia 1.0 by Simon Danisch
=#
const zerov8 = ntuple(x-> 0f0, 8)

@inline function step_mandel(Zr,Zi,Tr,Ti,cr,ci)
Zi = 2f0 .* Zr .* Zi .+ ci
Zr = Tr .- Ti .+ cr
Tr = Zr .* Zr
Ti = Zi .* Zi
return Zr,Zi,Tr,Ti
end
modified for Julia 1.0 by Simon Danisch.
tweaked for performance by https://github.com/maltezfaria and Adam Beckmeyer.
=#
const zerov8 = ntuple(x-> 0.0, 8)
const masks = (0b01111111, 0b10111111, 0b11011111, 0b11101111, 0b11110111,
0b11111011, 0b11111101, 0b11111110)

# Calculate mandelbrot set for one Vec8 into one byte
Base.@propagate_inbounds function mand8(cr, ci)
Zr = zerov8
Zi = zerov8
Tr = zerov8
Ti = zerov8
t = zerov8
Zr = Zi = Tr = Ti = t = zerov8
i = 0

while i<50
for _ in 1:5
Zr,Zi,Tr,Ti = step_mandel(Zr,Zi,Tr,Ti,cr,ci)
i += 1
for _=1:10
for _=1:5
Zi = 2.0 .* Zr .* Zi .+ ci
Zr = Tr .- Ti .+ cr
Tr = Zr .* Zr
Ti = Zi .* Zi
end
t = Tr .+ Ti
all(x-> x > 4f0, t) && (return 0x00)
all(x-> x > 4.0, t) && (return 0x00)
end

byte = 0xff
t[1] <= 4.0 || (byte &= 0b01111111)
t[2] <= 4.0 || (byte &= 0b10111111)
t[3] <= 4.0 || (byte &= 0b11011111)
t[4] <= 4.0 || (byte &= 0b11101111)
t[5] <= 4.0 || (byte &= 0b11110111)
t[6] <= 4.0 || (byte &= 0b11111011)
t[7] <= 4.0 || (byte &= 0b11111101)
t[8] <= 4.0 || (byte &= 0b11111110)
for i=1:8
t[i] <= 4.0 || (byte &= masks[i])
end
return byte
end

function mandel_inner(rows, ci, y, N, xvals)
@simd for x in 1:8:N
@inbounds begin
cr = ntuple(i-> xvals[x + (i - 1)], 8)
rows[((y-1)*N÷8+(x-1)÷8) + 1] = mand8(cr, ci)
end
@inbounds for x=1:8:N
cr = ntuple(i-> xvals[x + i - 1], 8)
rows[((y-1)*N÷8+(x-1)÷8) + 1] = mand8(cr, ci)
end
end

function mandelbrot(n = 200)
function mandelbrot(io, n = 200)
inv_ = 2.0 / n
N = n
xvals = zeros(Float32, n)
yvals = zeros(Float32, n)
Threads.@threads for i in 0:(N-1)
@inbounds xvals[i + 1] = i * inv_ - 1.5
@inbounds yvals[i + 1] = i * inv_ - 1.0
xvals = Vector{Float64}(undef, n)
yvals = Vector{Float64}(undef, n)
@inbounds for i in 0:(n-1)
xvals[i + 1] = i * inv_ - 1.5
yvals[i + 1] = i * inv_ - 1.0
end
rows = zeros(UInt8, n*N÷8)
Threads.@threads for y in 1:N

rows = Vector{UInt8}(undef, n^2 ÷ 8)
@sync for y=1:n
@inbounds ci = yvals[y]
mandel_inner(rows, ci, y, N, xvals)
# This allows dynamic scheduling instead of static scheduling
# of Threads.@threads macro. See
# https://github.com/JuliaLang/julia/issues/21017 . On some
# computers this is faster, on others not.
Threads.@spawn mandel_inner(rows, ci, y, n, xvals)
end
write(stdout, "P4\n$n $n\n")
write(stdout, rows)
write(io, "P4\n$n $n\n")
write(io, rows)
end

mandelbrot(parse(Int, ARGS[1]))
isinteractive() || mandelbrot(stdout, parse(Int, ARGS[1]))
63 changes: 63 additions & 0 deletions mandelbrot/mandelbrot-fast.v2.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#=
The Computer Language Benchmarks Game
https://salsa.debian.org/benchmarksgame-team/benchmarksgame/

direct transliteration of the swift#3 program by Ralph Ganszky and Daniel Muellenborn:
https://benchmarksgame-team.pages.debian.net/benchmarksgame/program/mandelbrot-swift-3.html

modified for Julia 1.0 by Simon Danisch.
tweaked for performance by https://github.com/maltezfaria and Adam Beckmeyer.
=#
const zerov8 = ntuple(x-> 0.0, 8)
const masks = (0b01111111, 0b10111111, 0b11011111, 0b11101111, 0b11110111,
0b11111011, 0b11111101, 0b11111110)

# Calculate mandelbrot set for one Vec8 into one byte
Base.@propagate_inbounds function mand8(cr, ci)
Zr = Zi = Tr = Ti = t = zerov8
i = 0

for _=1:10
for _=1:5
Zi = 2.0 .* Zr .* Zi .+ ci
Zr = Tr .- Ti .+ cr
Tr = Zr .* Zr
Ti = Zi .* Zi
end
t = Tr .+ Ti
all(x-> x > 4.0, t) && (return 0x00)
end

byte = 0xff
for i=1:8
t[i] <= 4.0 || (byte &= masks[i])
end
return byte
end

function mandel_inner(rows, ci, y, N, xvals)
@inbounds for x=1:8:N
cr = ntuple(i-> xvals[x + i - 1], 8)
rows[((y-1)*N÷8+(x-1)÷8) + 1] = mand8(cr, ci)
end
end

function mandelbrot(io, n = 200)
inv_ = 2.0 / n
xvals = Vector{Float64}(undef, n)
yvals = Vector{Float64}(undef, n)
@inbounds for i in 0:(n-1)
xvals[i + 1] = i * inv_ - 1.5
yvals[i + 1] = i * inv_ - 1.0
end

rows = Vector{UInt8}(undef, n^2 ÷ 8)
Threads.@threads for y=1:n
@inbounds ci = yvals[y]
mandel_inner(rows, ci, y, n, xvals)
end
write(io, "P4\n$n $n\n")
write(io, rows)
end

isinteractive() || mandelbrot(stdout, parse(Int, ARGS[1]))
64 changes: 64 additions & 0 deletions mandelbrot/mandelbrot-fast.v3.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#=
The Computer Language Benchmarks Game
https://salsa.debian.org/benchmarksgame-team/benchmarksgame/

direct transliteration of the swift#3 program by Ralph Ganszky and Daniel Muellenborn:
https://benchmarksgame-team.pages.debian.net/benchmarksgame/program/mandelbrot-swift-3.html

modified for Julia 1.0 by Simon Danisch.
tweaked for performance by https://github.com/maltezfaria and Adam Beckmeyer.
=#
using KissThreading

const zerov8 = ntuple(x-> 0.0, 8)
const masks = (0b01111111, 0b10111111, 0b11011111, 0b11101111, 0b11110111,
0b11111011, 0b11111101, 0b11111110)

# Calculate mandelbrot set for one Vec8 into one byte
Base.@propagate_inbounds function mand8(cr, ci)
Zr = Zi = Tr = Ti = t = zerov8
i = 0

for _=1:10
for _=1:5
Zi = 2.0 .* Zr .* Zi .+ ci
Zr = Tr .- Ti .+ cr
Tr = Zr .* Zr
Ti = Zi .* Zi
end
t = Tr .+ Ti
all(x-> x > 4.0, t) && (return 0x00)
end

byte = 0xff
for i=1:8
t[i] <= 4.0 || (byte &= masks[i])
end
return byte
end

function mandel_inner(rows, ci, y, N, xvals)
@inbounds for x=1:8:N
cr = ntuple(i-> xvals[x + i - 1], 8)
rows[((y-1)*N÷8+(x-1)÷8) + 1] = mand8(cr, ci)
end
end

function mandelbrot(io, n = 200)
inv_ = 2.0 / n
xvals = Vector{Float64}(undef, n)
yvals = Vector{Float64}(undef, n)
@inbounds for i in 0:(n-1)
xvals[i + 1] = i * inv_ - 1.5
yvals[i + 1] = i * inv_ - 1.0
end

rows = Vector{UInt8}(undef, n^2 ÷ 8)
f(y) = @inbounds mandel_inner(rows, yvals[y], y, n, xvals)
tmap!(f, Vector{Nothing}(undef, n), collect(1:n); batch_size=8)

write(io, "P4\n$n $n\n")
write(io, rows)
end

isinteractive() || mandelbrot(stdout, parse(Int, ARGS[1]))