Skip to content

Commit 6958e62

Browse files
committed
post-process rocprof results
rocPROF generates one trace for every process. Simply combine them together into a single trace for ease of use. Also remove the individual traces are they are no longer useful afterwards.
1 parent 0d685b5 commit 6958e62

File tree

1 file changed

+20
-3
lines changed

1 file changed

+20
-3
lines changed

src/libkernelbot/run_eval.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import json
55
import os
66
import shlex
7+
import shutil
78
import subprocess
89
import tempfile
910
import time
@@ -307,11 +308,13 @@ def profile_program(
307308
# The runner-specific configuration should implement logic
308309
# to fetch the data in this directory and return it as
309310
# ProfileResult.download_url.
310-
output_dir = Path('profile_data')
311+
# Insert an extra nested nested path here so that the resulting zip has all files
312+
# in the profile_data/ directory rather than directly in the root.
313+
output_dir = Path(".") / "profile_data" / "profile_data"
314+
output_dir.mkdir(parents=True, exist_ok=True)
311315

312316
if system.runtime == "ROCm":
313317
# Wrap program in rocprof
314-
output_dir.mkdir()
315318
call = [
316319
"rocprofv3",
317320
"--log-level",
@@ -337,14 +340,28 @@ def profile_program(
337340
"-o",
338341
# Insert an extra path here so that the resulting zip has all files
339342
# in the profile_data/ directory rather than the root.
340-
"profile_data/%pid%",
343+
"%pid%",
341344
"--",
342345
] + call
343346

344347
run_result = run_program(call, seed=seed, timeout=timeout, multi_gpu=multi_gpu)
345348
profile_result = None
346349

347350
if run_result.success:
351+
# Post-process trace data.
352+
# rocPROF generates one trace for every process, but its more useful to
353+
# have all traces be in the same file. Fortunately we can do that by
354+
# concatenating.
355+
traces = list(output_dir.glob("*.pftrace"))
356+
with (output_dir / "combined.pftrace").open("wb") as combined:
357+
for trace_path in traces:
358+
with trace_path.open("rb") as trace:
359+
shutil.copyfileobj(trace, combined)
360+
361+
# After we've created the combined trace, there is no point in
362+
# keeping the individual traces around.
363+
trace_path.unlink()
364+
348365
profile_result = ProfileResult(
349366
profiler='rocPROF',
350367
download_url=None,

0 commit comments

Comments
 (0)