11using ImageFiltering, FFTW, LinearAlgebra, Profile, Random
2- # using ProfileView
32using ComputationalResources
43
54FFTW. set_num_threads (parse (Int, get (ENV , " FFTW_NUM_THREADS" , " 1" )))
65BLAS. set_num_threads (parse (Int, get (ENV , " BLAS_NUM_THREADS" , string (Threads. nthreads () ÷ 2 ))))
76
8- function benchmark (mats)
7+ function benchmark_new (mats)
98 kernel = ImageFiltering. factorkernel (Kernel. LoG (1 ))
109 Threads. @threads for mat in mats
1110 frame_filtered = deepcopy (mat[:, :, 1 ])
@@ -17,6 +16,18 @@ function benchmark(mats)
1716 return
1817 end
1918end
19+ function benchmark_old (mats)
20+ kernel = ImageFiltering. factorkernel (Kernel. LoG (1 ))
21+ Threads. @threads for mat in mats
22+ frame_filtered = deepcopy (mat[:, :, 1 ])
23+ r_noncached = CPU1 (Algorithm. FFT ())
24+ for i in axes (mat, 3 )
25+ frame = @view mat[:, :, i]
26+ imfilter! (r_noncached, frame_filtered, frame, kernel)
27+ end
28+ return
29+ end
30+ end
2031
2132function test (mats)
2233 kernel = ImageFiltering. factorkernel (Kernel. LoG (1 ))
@@ -26,54 +37,31 @@ function test(mats)
2637 f2 = deepcopy (mat[:, :, 1 ])
2738 r_noncached = CPU1 (Algorithm. FFT ())
2839 for i in axes (mat, 3 )
29- frame = @view mat[:, :, i]
30- @info " imfilter! noncached"
31- imfilter! (r_noncached, f2, frame, kernel)
32- @info " imfilter! cached"
33- imfilter! (r_cached, f1, frame, kernel)
40+ imfilter! (r_noncached, f2, deepcopy (mat[:, :, i]), kernel)
41+ imfilter! (r_cached, f1, deepcopy (mat[:, :, i]), kernel)
3442 @show f1[1 : 4 ] f2[1 : 4 ]
3543 f1 ≈ f2 || error (" f1 !≈ f2" )
3644 end
3745 return
3846 end
3947end
4048
41- function profile ()
49+ function run ()
4250 Random. seed! (1 )
4351 nmats = 10
44- mats = [rand (Float32, rand (80 : 100 ), rand (80 : 100 ), rand (2000 : 3000 )) for _ in 1 : nmats]
45- GC. gc (true )
52+ mats = [rand (Float64, rand (80 : 100 ), rand (80 : 100 ), rand (2000 : 3000 )) for _ in 1 : nmats]
4653
47- # benchmark(mats)
54+ benchmark_new (mats)
55+ for _ in 1 : 3
56+ @time " warm run of benchmark_new(mats)" benchmark_new (mats)
57+ end
4858
49- # for _ in 1:3
50- # @time "warm run of benchmark(mats)" benchmark(mats)
51- # end
59+ benchmark_old (mats)
60+ for _ in 1 : 3
61+ @time " warm run of benchmark_old(mats)" benchmark_old (mats)
62+ end
5263
5364 test (mats)
54-
55- # Profile.clear()
56- # @profile benchmark(mats)
57-
58- # Profile.print(IOContext(stdout, :displaysize => (24, 200)); C=true, combine=true, mincount=100)
59- # # ProfileView.view()
60- # GC.gc(true)
6165end
6266
63- profile ()
64-
65- using ImageFiltering
66- using ImageFiltering. RFFT
67-
68- function mwe ()
69- a = rand (Float64, 10 , 10 )
70- out1 = rfft (a)
71-
72- buf = RFFT. RCpair {Float64} (undef, size (a))
73- rfft_plan = RFFT. plan_rfft! (buf)
74- copy! (buf, a)
75- out2 = complex (rfft_plan (buf))
76-
77- return out1 ≈ out2
78- end
79- mwe ()
67+ run ()
0 commit comments