-
Notifications
You must be signed in to change notification settings - Fork 251
Switch to texthighlighter to unbreak profile #2891
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Conversation
Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/src/profile.jl b/src/profile.jl
index a276fff90..a82ba7c66 100644
--- a/src/profile.jl
+++ b/src/profile.jl
@@ -660,15 +660,21 @@ function Base.show(io::IO, results::ProfileResults)
p75 = quantile(relevant_times, 0.75)
p95 = quantile(relevant_times, 0.95)
- highlight_p95 = TextHighlighter((data, i, j) -> (names(data)[j] == "time") &&
- (data[i,j] >= p95),
- crayon"red")
- highlight_p75 = TextHighlighter((data, i, j) -> (names(data)[j] == "time") &&
- (data[i,j] >= p75),
- crayon"yellow")
- highlight_bold = TextHighlighter((data, i, j) -> (names(data)[j] == "name") &&
- (data[!, :time][i] >= p75),
- crayon"bold")
+ highlight_p95 = TextHighlighter(
+ (data, i, j) -> (names(data)[j] == "time") &&
+ (data[i, j] >= p95),
+ crayon"red"
+ )
+ highlight_p75 = TextHighlighter(
+ (data, i, j) -> (names(data)[j] == "time") &&
+ (data[i, j] >= p75),
+ crayon"yellow"
+ )
+ highlight_bold = TextHighlighter(
+ (data, i, j) -> (names(data)[j] == "name") &&
+ (data[!, :time][i] >= p75),
+ crayon"bold"
+ )
(highlight_p95, highlight_p75, highlight_bold)
end
@@ -829,9 +835,9 @@ function Base.show(io::IO, results::ProfileResults)
end
end
highlighters = time_highlighters(df)
- highlighters = isempty(highlighters) ? PrettyTables.TextHighlighter[] : collect(highlighters)
- pretty_table(io, df; column_labels=header, alignment, formatters=[formatters], highlighters)#,
- #body_hlines=trace_divisions)
+ highlighters = isempty(highlighters) ? PrettyTables.TextHighlighter[] : collect(highlighters)
+ pretty_table(io, df; column_labels = header, alignment, formatters = [formatters], highlighters) #,
+ #body_hlines=trace_divisions)
else
df = summarize_trace(df)
@@ -845,7 +851,7 @@ function Base.show(io::IO, results::ProfileResults)
header = [summary_column_names[name] for name in names(df)]
alignment = [name in ["name", "time_dist"] ? :l : :r for name in names(df)]
highlighters = time_highlighters(df)
- pretty_table(io, df; column_labels=header, alignment, formatters=[summary_formatter(df)], highlighters=collect(highlighters))
+ pretty_table(io, df; column_labels = header, alignment, formatters = [summary_formatter(df)], highlighters = collect(highlighters))
end
end
@@ -930,8 +936,8 @@ function Base.show(io::IO, results::ProfileResults)
end
end
highlighters = time_highlighters(df)
- pretty_table(io, df; column_labels=header, alignment, formatters=[formatters], highlighters=collect(highlighters),)
- #body_hlines=trace_divisions)
+ pretty_table(io, df; column_labels = header, alignment, formatters = [formatters], highlighters = collect(highlighters))
+ #body_hlines=trace_divisions)
else
df = summarize_trace(results.device)
@@ -945,7 +951,7 @@ function Base.show(io::IO, results::ProfileResults)
header = [summary_column_names[name] for name in names(df)]
alignment = [name in ["name", "time_dist"] ? :l : :r for name in names(df)]
highlighters = time_highlighters(df)
- pretty_table(io, df; column_labels=header, alignment, formatters=[summary_formatter(df)], highlighters=collect(highlighters))
+ pretty_table(io, df; column_labels = header, alignment, formatters = [summary_formatter(df)], highlighters = collect(highlighters))
end
end
@@ -982,7 +988,7 @@ function Base.show(io::IO, results::ProfileResults)
for color in unique(df.color)
if color !== nothing
ids = df[df.color .== color, :id]
- highlighter = TextHighlighter(Crayon(; foreground=color)) do data, i, j
+ highlighter = TextHighlighter(Crayon(; foreground = color)) do data, i, j
names(data)[j] in ["name", "domain"] && data[!, :id][i] in ids
end
push!(color_highlighters, highlighter)
@@ -1003,7 +1009,7 @@ function Base.show(io::IO, results::ProfileResults)
end
end
highlighters = tuple(color_highlighters..., time_highlighters(df)...)
- pretty_table(io, df; column_labels=header, alignment, formatters=[formatters], highlighters=collect(highlighters))
+ pretty_table(io, df; column_labels = header, alignment, formatters = [formatters], highlighters = collect(highlighters))
else
# merge the domain and name into a single column
nvtx_ranges.name = map(nvtx_ranges.name, nvtx_ranges.domain) do name, domain
@@ -1026,7 +1032,7 @@ function Base.show(io::IO, results::ProfileResults)
header = [summary_column_names[name] for name in names(df)]
alignment = [name in ["name", "time_dist"] ? :l : :r for name in names(df)]
highlighters = time_highlighters(df)
- pretty_table(io, df; column_labels=header, alignment, formatters=[summary_formatter(df)], highlighters=collect(highlighters))
+ pretty_table(io, df; column_labels = header, alignment, formatters = [summary_formatter(df)], highlighters = collect(highlighters))
end
end
diff --git a/test/core/profile.jl b/test/core/profile.jl
index 84b22e96c..f0dfd8e93 100644
--- a/test/core/profile.jl
+++ b/test/core/profile.jl
@@ -53,25 +53,25 @@ end
let
str = string(CUDA.@profile trace=true raw=true @cuda identity(nothing))
- @test_broken occursin("cuLaunchKernel", str)
- @test_broken occursin("identity()", str)
+ @test_broken occursin("cuLaunchKernel", str)
+ @test_broken occursin("identity()", str)
@test occursin("ID", str)
- @test_broken occursin("cuCtxSynchronize", str)
+ @test_broken occursin("cuCtxSynchronize", str)
end
# benchmarked profile
let
str = string(CUDA.@bprofile @cuda identity(nothing))
- @test_broken occursin("cuLaunchKernel", str)
- @test_broken occursin("identity()", str)
+ @test_broken occursin("cuLaunchKernel", str)
+ @test_broken occursin("identity()", str)
@test !occursin("cuCtxGetCurrent", str)
str = string(CUDA.@bprofile raw=true @cuda identity(nothing))
- @test_broken occursin("cuLaunchKernel", str)
- @test_broken occursin("identity()", str)
- @test_broken occursin("cuCtxGetCurrent", str)
+ @test_broken occursin("cuLaunchKernel", str)
+ @test_broken occursin("identity()", str)
+ @test_broken occursin("cuCtxGetCurrent", str)
end
if CUPTI.version() != v"13.0.0" # NVIDIA/NVTX#125 |
I've done #2892 first. Let's bump the compat in here then? |
c022864
to
a83be6f
Compare
7a4a3ca
to
2a39abd
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CUDA.jl Benchmarks
Benchmark suite | Current: 1a584e5 | Previous: 6c6977f | Ratio |
---|---|---|---|
latency/precompile |
57159963125.5 ns |
43775122238.5 ns |
1.31 |
latency/ttfp |
7935287212.5 ns |
7276778491 ns |
1.09 |
latency/import |
4442786891 ns |
3836255124 ns |
1.16 |
integration/volumerhs |
9625884 ns |
9623790.5 ns |
1.00 |
integration/byval/slices=1 |
147010 ns |
146826 ns |
1.00 |
integration/byval/slices=3 |
425844 ns |
426011 ns |
1.00 |
integration/byval/reference |
145015 ns |
145073 ns |
1.00 |
integration/byval/slices=2 |
286522 ns |
286240 ns |
1.00 |
integration/cudadevrt |
103581 ns |
103460 ns |
1.00 |
kernel/indexing |
14168 ns |
14196 ns |
1.00 |
kernel/indexing_checked |
14990 ns |
15033 ns |
1.00 |
kernel/occupancy |
688.9271523178808 ns |
670.506329113924 ns |
1.03 |
kernel/launch |
2204 ns |
2162.5555555555557 ns |
1.02 |
kernel/rand |
14862 ns |
16555 ns |
0.90 |
array/reverse/1d |
19807 ns |
19646 ns |
1.01 |
array/reverse/2dL_inplace |
66861 ns |
66804 ns |
1.00 |
array/reverse/1dL |
69952 ns |
69923 ns |
1.00 |
array/reverse/2d |
21989.5 ns |
21677 ns |
1.01 |
array/reverse/1d_inplace |
9652 ns |
9658 ns |
1.00 |
array/reverse/2d_inplace |
13355 ns |
13323 ns |
1.00 |
array/reverse/2dL |
73988.5 ns |
73803 ns |
1.00 |
array/reverse/1dL_inplace |
66823 ns |
66812 ns |
1.00 |
array/copy |
20216 ns |
20306 ns |
1.00 |
array/iteration/findall/int |
156845.5 ns |
157164 ns |
1.00 |
array/iteration/findall/bool |
139096 ns |
139633 ns |
1.00 |
array/iteration/findfirst/int |
160778.5 ns |
160554.5 ns |
1.00 |
array/iteration/findfirst/bool |
161828 ns |
160957 ns |
1.01 |
array/iteration/scalar |
71596 ns |
72124 ns |
0.99 |
array/iteration/logical |
214030 ns |
215036.5 ns |
1.00 |
array/iteration/findmin/1d |
49953 ns |
49445 ns |
1.01 |
array/iteration/findmin/2d |
96065 ns |
96493.5 ns |
1.00 |
array/reductions/reduce/Int64/1d |
42860 ns |
42960 ns |
1.00 |
array/reductions/reduce/Int64/dims=1 |
46518 ns |
44742.5 ns |
1.04 |
array/reductions/reduce/Int64/dims=2 |
61329 ns |
61453 ns |
1.00 |
array/reductions/reduce/Int64/dims=1L |
89026 ns |
88951 ns |
1.00 |
array/reductions/reduce/Int64/dims=2L |
87704 ns |
88014.5 ns |
1.00 |
array/reductions/reduce/Float32/1d |
36194 ns |
35769 ns |
1.01 |
array/reductions/reduce/Float32/dims=1 |
41670 ns |
51586 ns |
0.81 |
array/reductions/reduce/Float32/dims=2 |
59747 ns |
59511 ns |
1.00 |
array/reductions/reduce/Float32/dims=1L |
52356 ns |
52474 ns |
1.00 |
array/reductions/reduce/Float32/dims=2L |
71675 ns |
71419 ns |
1.00 |
array/reductions/mapreduce/Int64/1d |
42980 ns |
43189 ns |
1.00 |
array/reductions/mapreduce/Int64/dims=1 |
53350 ns |
46540.5 ns |
1.15 |
array/reductions/mapreduce/Int64/dims=2 |
61617 ns |
61279.5 ns |
1.01 |
array/reductions/mapreduce/Int64/dims=1L |
88923 ns |
88854 ns |
1.00 |
array/reductions/mapreduce/Int64/dims=2L |
88079 ns |
88014 ns |
1.00 |
array/reductions/mapreduce/Float32/1d |
35901 ns |
36287 ns |
0.99 |
array/reductions/mapreduce/Float32/dims=1 |
41785 ns |
41466 ns |
1.01 |
array/reductions/mapreduce/Float32/dims=2 |
60179 ns |
59744 ns |
1.01 |
array/reductions/mapreduce/Float32/dims=1L |
52687 ns |
52550 ns |
1.00 |
array/reductions/mapreduce/Float32/dims=2L |
72062.5 ns |
71985 ns |
1.00 |
array/broadcast |
20288 ns |
20047 ns |
1.01 |
array/copyto!/gpu_to_gpu |
12774 ns |
11191 ns |
1.14 |
array/copyto!/cpu_to_gpu |
214743 ns |
213964 ns |
1.00 |
array/copyto!/gpu_to_cpu |
284572 ns |
284661.5 ns |
1.00 |
array/accumulate/Int64/1d |
125238 ns |
124888 ns |
1.00 |
array/accumulate/Int64/dims=1 |
83053 ns |
83130 ns |
1.00 |
array/accumulate/Int64/dims=2 |
157577 ns |
157680 ns |
1.00 |
array/accumulate/Int64/dims=1L |
1708697 ns |
1709578 ns |
1.00 |
array/accumulate/Int64/dims=2L |
965843 ns |
966045 ns |
1.00 |
array/accumulate/Float32/1d |
109037 ns |
108910 ns |
1.00 |
array/accumulate/Float32/dims=1 |
80484 ns |
80564 ns |
1.00 |
array/accumulate/Float32/dims=2 |
147354 ns |
147715 ns |
1.00 |
array/accumulate/Float32/dims=1L |
1618546 ns |
1618612 ns |
1.00 |
array/accumulate/Float32/dims=2L |
698010 ns |
698318 ns |
1.00 |
array/construct |
1261.4 ns |
1287.5 ns |
0.98 |
array/random/randn/Float32 |
44046 ns |
43976 ns |
1.00 |
array/random/randn!/Float32 |
24739 ns |
24816 ns |
1.00 |
array/random/rand!/Int64 |
27457 ns |
27267 ns |
1.01 |
array/random/rand!/Float32 |
8699 ns |
8653.333333333334 ns |
1.01 |
array/random/rand/Int64 |
31362 ns |
38285 ns |
0.82 |
array/random/rand/Float32 |
12953 ns |
13026 ns |
0.99 |
array/permutedims/4d |
59865 ns |
60152.5 ns |
1.00 |
array/permutedims/2d |
54251 ns |
53934 ns |
1.01 |
array/permutedims/3d |
54850 ns |
54649.5 ns |
1.00 |
array/sorting/1d |
2755637 ns |
2757180 ns |
1.00 |
array/sorting/by |
3367795.5 ns |
3343619 ns |
1.01 |
array/sorting/2d |
1084536 ns |
1080730 ns |
1.00 |
cuda/synchronization/stream/auto |
1015.6 ns |
1029.6 ns |
0.99 |
cuda/synchronization/stream/nonblocking |
8508.7 ns |
7270.700000000001 ns |
1.17 |
cuda/synchronization/stream/blocking |
815.3333333333334 ns |
850.7415730337078 ns |
0.96 |
cuda/synchronization/context/auto |
1156.9 ns |
1157 ns |
1.00 |
cuda/synchronization/context/nonblocking |
7177.700000000001 ns |
6964 ns |
1.03 |
cuda/synchronization/context/blocking |
900.1730769230769 ns |
888.5208333333334 ns |
1.01 |
This comment was automatically generated by workflow using github-action-benchmark.
@test_broken occursin("cuLaunchKernel", str) | ||
@test_broken occursin("identity()", str) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was marking things broken to figure out what just doesn't work with the new PT 3.0. Let me mark this PR draft.
cc @ronisbr, could you help us out transitioning to |
Hi @kshyatt! Yes, sure! I can submit a PR to adapt the old API to the new one. I am in the middle of a trip outside my country, but I will do this in a spare time. |
Hopefully this works