Skip to content

Conversation

kshyatt
Copy link
Member

@kshyatt kshyatt commented Sep 11, 2025

Hopefully this works

@kshyatt kshyatt requested a review from maleadt September 11, 2025 10:21
Copy link
Contributor

github-actions bot commented Sep 11, 2025

Your PR requires formatting changes to meet the project's style guidelines.
Please consider running Runic (git runic master) to apply these changes.

Click here to view the suggested changes.
diff --git a/src/profile.jl b/src/profile.jl
index a276fff90..a82ba7c66 100644
--- a/src/profile.jl
+++ b/src/profile.jl
@@ -660,15 +660,21 @@ function Base.show(io::IO, results::ProfileResults)
         p75 = quantile(relevant_times, 0.75)
         p95 = quantile(relevant_times, 0.95)
 
-        highlight_p95 = TextHighlighter((data, i, j) -> (names(data)[j] == "time") &&
-                                                        (data[i,j] >= p95),
-                                        crayon"red")
-        highlight_p75 = TextHighlighter((data, i, j) -> (names(data)[j] == "time") &&
-                                                        (data[i,j] >= p75),
-                                        crayon"yellow")
-        highlight_bold = TextHighlighter((data, i, j) -> (names(data)[j] == "name") &&
-                                                         (data[!, :time][i] >= p75),
-                                         crayon"bold")
+            highlight_p95 = TextHighlighter(
+                (data, i, j) -> (names(data)[j] == "time") &&
+                    (data[i, j] >= p95),
+                crayon"red"
+            )
+            highlight_p75 = TextHighlighter(
+                (data, i, j) -> (names(data)[j] == "time") &&
+                    (data[i, j] >= p75),
+                crayon"yellow"
+            )
+            highlight_bold = TextHighlighter(
+                (data, i, j) -> (names(data)[j] == "name") &&
+                    (data[!, :time][i] >= p75),
+                crayon"bold"
+            )
 
         (highlight_p95, highlight_p75, highlight_bold)
     end
@@ -829,9 +835,9 @@ function Base.show(io::IO, results::ProfileResults)
                 end
             end
             highlighters = time_highlighters(df)
-            highlighters = isempty(highlighters) ? PrettyTables.TextHighlighter[] : collect(highlighters)
-            pretty_table(io, df; column_labels=header, alignment, formatters=[formatters], highlighters)#,
-                                 #body_hlines=trace_divisions)
+                highlighters = isempty(highlighters) ? PrettyTables.TextHighlighter[] : collect(highlighters)
+                pretty_table(io, df; column_labels = header, alignment, formatters = [formatters], highlighters) #,
+                #body_hlines=trace_divisions)
         else
             df = summarize_trace(df)
 
@@ -845,7 +851,7 @@ function Base.show(io::IO, results::ProfileResults)
             header = [summary_column_names[name] for name in names(df)]
             alignment = [name in ["name", "time_dist"] ? :l : :r for name in names(df)]
             highlighters = time_highlighters(df)
-            pretty_table(io, df; column_labels=header, alignment, formatters=[summary_formatter(df)], highlighters=collect(highlighters))
+                pretty_table(io, df; column_labels = header, alignment, formatters = [summary_formatter(df)], highlighters = collect(highlighters))
         end
     end
 
@@ -930,8 +936,8 @@ function Base.show(io::IO, results::ProfileResults)
                 end
             end
             highlighters = time_highlighters(df)
-            pretty_table(io, df; column_labels=header, alignment, formatters=[formatters], highlighters=collect(highlighters),)
-                                 #body_hlines=trace_divisions)
+                pretty_table(io, df; column_labels = header, alignment, formatters = [formatters], highlighters = collect(highlighters))
+                #body_hlines=trace_divisions)
         else
             df = summarize_trace(results.device)
 
@@ -945,7 +951,7 @@ function Base.show(io::IO, results::ProfileResults)
             header = [summary_column_names[name] for name in names(df)]
             alignment = [name in ["name", "time_dist"] ? :l : :r for name in names(df)]
             highlighters = time_highlighters(df)
-            pretty_table(io, df; column_labels=header, alignment, formatters=[summary_formatter(df)], highlighters=collect(highlighters))
+                pretty_table(io, df; column_labels = header, alignment, formatters = [summary_formatter(df)], highlighters = collect(highlighters))
         end
     end
 
@@ -982,7 +988,7 @@ function Base.show(io::IO, results::ProfileResults)
             for color in unique(df.color)
                 if color !== nothing
                     ids = df[df.color .== color, :id]
-                    highlighter = TextHighlighter(Crayon(; foreground=color)) do data, i, j
+                        highlighter = TextHighlighter(Crayon(; foreground = color)) do data, i, j
                         names(data)[j] in ["name", "domain"] && data[!, :id][i] in ids
                     end
                     push!(color_highlighters, highlighter)
@@ -1003,7 +1009,7 @@ function Base.show(io::IO, results::ProfileResults)
                 end
             end
             highlighters = tuple(color_highlighters..., time_highlighters(df)...)
-            pretty_table(io, df; column_labels=header, alignment, formatters=[formatters], highlighters=collect(highlighters))
+                pretty_table(io, df; column_labels = header, alignment, formatters = [formatters], highlighters = collect(highlighters))
         else
             # merge the domain and name into a single column
             nvtx_ranges.name = map(nvtx_ranges.name, nvtx_ranges.domain) do name, domain
@@ -1026,7 +1032,7 @@ function Base.show(io::IO, results::ProfileResults)
             header = [summary_column_names[name] for name in names(df)]
             alignment = [name in ["name", "time_dist"] ? :l : :r for name in names(df)]
             highlighters = time_highlighters(df)
-            pretty_table(io, df; column_labels=header, alignment, formatters=[summary_formatter(df)], highlighters=collect(highlighters))
+                pretty_table(io, df; column_labels = header, alignment, formatters = [summary_formatter(df)], highlighters = collect(highlighters))
         end
     end
 
diff --git a/test/core/profile.jl b/test/core/profile.jl
index 84b22e96c..f0dfd8e93 100644
--- a/test/core/profile.jl
+++ b/test/core/profile.jl
@@ -53,25 +53,25 @@ end
 let
     str = string(CUDA.@profile trace=true raw=true @cuda identity(nothing))
 
-    @test_broken occursin("cuLaunchKernel", str)
-    @test_broken occursin("identity()", str)
+                @test_broken occursin("cuLaunchKernel", str)
+                @test_broken occursin("identity()", str)
 
     @test occursin("ID", str)
 
-    @test_broken occursin("cuCtxSynchronize", str)
+                @test_broken occursin("cuCtxSynchronize", str)
 end
 
 # benchmarked profile
 let
     str = string(CUDA.@bprofile @cuda identity(nothing))
-    @test_broken occursin("cuLaunchKernel", str)
-    @test_broken occursin("identity()", str)
+                @test_broken occursin("cuLaunchKernel", str)
+                @test_broken occursin("identity()", str)
     @test !occursin("cuCtxGetCurrent", str)
 
     str = string(CUDA.@bprofile raw=true @cuda identity(nothing))
-    @test_broken occursin("cuLaunchKernel", str)
-    @test_broken occursin("identity()", str)
-    @test_broken occursin("cuCtxGetCurrent", str)
+                @test_broken occursin("cuLaunchKernel", str)
+                @test_broken occursin("identity()", str)
+                @test_broken occursin("cuCtxGetCurrent", str)
 end
 
 if CUPTI.version() != v"13.0.0" # NVIDIA/NVTX#125

@maleadt
Copy link
Member

maleadt commented Sep 11, 2025

I've done #2892 first. Let's bump the compat in here then?

Copy link
Contributor

@github-actions github-actions bot left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CUDA.jl Benchmarks

Benchmark suite Current: 1a584e5 Previous: 6c6977f Ratio
latency/precompile 57159963125.5 ns 43775122238.5 ns 1.31
latency/ttfp 7935287212.5 ns 7276778491 ns 1.09
latency/import 4442786891 ns 3836255124 ns 1.16
integration/volumerhs 9625884 ns 9623790.5 ns 1.00
integration/byval/slices=1 147010 ns 146826 ns 1.00
integration/byval/slices=3 425844 ns 426011 ns 1.00
integration/byval/reference 145015 ns 145073 ns 1.00
integration/byval/slices=2 286522 ns 286240 ns 1.00
integration/cudadevrt 103581 ns 103460 ns 1.00
kernel/indexing 14168 ns 14196 ns 1.00
kernel/indexing_checked 14990 ns 15033 ns 1.00
kernel/occupancy 688.9271523178808 ns 670.506329113924 ns 1.03
kernel/launch 2204 ns 2162.5555555555557 ns 1.02
kernel/rand 14862 ns 16555 ns 0.90
array/reverse/1d 19807 ns 19646 ns 1.01
array/reverse/2dL_inplace 66861 ns 66804 ns 1.00
array/reverse/1dL 69952 ns 69923 ns 1.00
array/reverse/2d 21989.5 ns 21677 ns 1.01
array/reverse/1d_inplace 9652 ns 9658 ns 1.00
array/reverse/2d_inplace 13355 ns 13323 ns 1.00
array/reverse/2dL 73988.5 ns 73803 ns 1.00
array/reverse/1dL_inplace 66823 ns 66812 ns 1.00
array/copy 20216 ns 20306 ns 1.00
array/iteration/findall/int 156845.5 ns 157164 ns 1.00
array/iteration/findall/bool 139096 ns 139633 ns 1.00
array/iteration/findfirst/int 160778.5 ns 160554.5 ns 1.00
array/iteration/findfirst/bool 161828 ns 160957 ns 1.01
array/iteration/scalar 71596 ns 72124 ns 0.99
array/iteration/logical 214030 ns 215036.5 ns 1.00
array/iteration/findmin/1d 49953 ns 49445 ns 1.01
array/iteration/findmin/2d 96065 ns 96493.5 ns 1.00
array/reductions/reduce/Int64/1d 42860 ns 42960 ns 1.00
array/reductions/reduce/Int64/dims=1 46518 ns 44742.5 ns 1.04
array/reductions/reduce/Int64/dims=2 61329 ns 61453 ns 1.00
array/reductions/reduce/Int64/dims=1L 89026 ns 88951 ns 1.00
array/reductions/reduce/Int64/dims=2L 87704 ns 88014.5 ns 1.00
array/reductions/reduce/Float32/1d 36194 ns 35769 ns 1.01
array/reductions/reduce/Float32/dims=1 41670 ns 51586 ns 0.81
array/reductions/reduce/Float32/dims=2 59747 ns 59511 ns 1.00
array/reductions/reduce/Float32/dims=1L 52356 ns 52474 ns 1.00
array/reductions/reduce/Float32/dims=2L 71675 ns 71419 ns 1.00
array/reductions/mapreduce/Int64/1d 42980 ns 43189 ns 1.00
array/reductions/mapreduce/Int64/dims=1 53350 ns 46540.5 ns 1.15
array/reductions/mapreduce/Int64/dims=2 61617 ns 61279.5 ns 1.01
array/reductions/mapreduce/Int64/dims=1L 88923 ns 88854 ns 1.00
array/reductions/mapreduce/Int64/dims=2L 88079 ns 88014 ns 1.00
array/reductions/mapreduce/Float32/1d 35901 ns 36287 ns 0.99
array/reductions/mapreduce/Float32/dims=1 41785 ns 41466 ns 1.01
array/reductions/mapreduce/Float32/dims=2 60179 ns 59744 ns 1.01
array/reductions/mapreduce/Float32/dims=1L 52687 ns 52550 ns 1.00
array/reductions/mapreduce/Float32/dims=2L 72062.5 ns 71985 ns 1.00
array/broadcast 20288 ns 20047 ns 1.01
array/copyto!/gpu_to_gpu 12774 ns 11191 ns 1.14
array/copyto!/cpu_to_gpu 214743 ns 213964 ns 1.00
array/copyto!/gpu_to_cpu 284572 ns 284661.5 ns 1.00
array/accumulate/Int64/1d 125238 ns 124888 ns 1.00
array/accumulate/Int64/dims=1 83053 ns 83130 ns 1.00
array/accumulate/Int64/dims=2 157577 ns 157680 ns 1.00
array/accumulate/Int64/dims=1L 1708697 ns 1709578 ns 1.00
array/accumulate/Int64/dims=2L 965843 ns 966045 ns 1.00
array/accumulate/Float32/1d 109037 ns 108910 ns 1.00
array/accumulate/Float32/dims=1 80484 ns 80564 ns 1.00
array/accumulate/Float32/dims=2 147354 ns 147715 ns 1.00
array/accumulate/Float32/dims=1L 1618546 ns 1618612 ns 1.00
array/accumulate/Float32/dims=2L 698010 ns 698318 ns 1.00
array/construct 1261.4 ns 1287.5 ns 0.98
array/random/randn/Float32 44046 ns 43976 ns 1.00
array/random/randn!/Float32 24739 ns 24816 ns 1.00
array/random/rand!/Int64 27457 ns 27267 ns 1.01
array/random/rand!/Float32 8699 ns 8653.333333333334 ns 1.01
array/random/rand/Int64 31362 ns 38285 ns 0.82
array/random/rand/Float32 12953 ns 13026 ns 0.99
array/permutedims/4d 59865 ns 60152.5 ns 1.00
array/permutedims/2d 54251 ns 53934 ns 1.01
array/permutedims/3d 54850 ns 54649.5 ns 1.00
array/sorting/1d 2755637 ns 2757180 ns 1.00
array/sorting/by 3367795.5 ns 3343619 ns 1.01
array/sorting/2d 1084536 ns 1080730 ns 1.00
cuda/synchronization/stream/auto 1015.6 ns 1029.6 ns 0.99
cuda/synchronization/stream/nonblocking 8508.7 ns 7270.700000000001 ns 1.17
cuda/synchronization/stream/blocking 815.3333333333334 ns 850.7415730337078 ns 0.96
cuda/synchronization/context/auto 1156.9 ns 1157 ns 1.00
cuda/synchronization/context/nonblocking 7177.700000000001 ns 6964 ns 1.03
cuda/synchronization/context/blocking 900.1730769230769 ns 888.5208333333334 ns 1.01

This comment was automatically generated by workflow using github-action-benchmark.

Comment on lines +56 to +57
@test_broken occursin("cuLaunchKernel", str)
@test_broken occursin("identity()", str)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was marking things broken to figure out what just doesn't work with the new PT 3.0. Let me mark this PR draft.

@kshyatt kshyatt marked this pull request as draft September 16, 2025 13:27
@kshyatt
Copy link
Member Author

kshyatt commented Sep 16, 2025

cc @ronisbr, could you help us out transitioning to PrettyTables.jl 3.0 here? There have been a bunch of API changes I'm unsure how to work with so some expert advice would be very welcome :)

@ronisbr
Copy link

ronisbr commented Sep 16, 2025

Hi @kshyatt!

Yes, sure! I can submit a PR to adapt the old API to the new one. I am in the middle of a trip outside my country, but I will do this in a spare time.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants