4
4
import json
5
5
import os
6
6
import shlex
7
+ import shutil
7
8
import subprocess
8
9
import tempfile
9
10
import time
@@ -307,11 +308,13 @@ def profile_program(
307
308
# The runner-specific configuration should implement logic
308
309
# to fetch the data in this directory and return it as
309
310
# ProfileResult.download_url.
310
- output_dir = Path ('profile_data' )
311
+ # Insert an extra nested nested path here so that the resulting zip has all files
312
+ # in the profile_data/ directory rather than directly in the root.
313
+ output_dir = Path ("." ) / "profile_data" / "profile_data"
314
+ output_dir .mkdir (parents = True , exist_ok = True )
311
315
312
316
if system .runtime == "ROCm" :
313
317
# Wrap program in rocprof
314
- output_dir .mkdir ()
315
318
call = [
316
319
"rocprofv3" ,
317
320
"--log-level" ,
@@ -337,14 +340,28 @@ def profile_program(
337
340
"-o" ,
338
341
# Insert an extra path here so that the resulting zip has all files
339
342
# in the profile_data/ directory rather than the root.
340
- "profile_data/ %pid%" ,
343
+ "%pid%" ,
341
344
"--" ,
342
345
] + call
343
346
344
347
run_result = run_program (call , seed = seed , timeout = timeout , multi_gpu = multi_gpu )
345
348
profile_result = None
346
349
347
350
if run_result .success :
351
+ # Post-process trace data.
352
+ # rocPROF generates one trace for every process, but its more useful to
353
+ # have all traces be in the same file. Fortunately we can do that by
354
+ # concatenating.
355
+ traces = list (output_dir .glob ("*.pftrace" ))
356
+ with (output_dir / "combined.pftrace" ).open ("wb" ) as combined :
357
+ for trace_path in traces :
358
+ with trace_path .open ("rb" ) as trace :
359
+ shutil .copyfileobj (trace , combined )
360
+
361
+ # After we've created the combined trace, there is no point in
362
+ # keeping the individual traces around.
363
+ trace_path .unlink ()
364
+
348
365
profile_result = ProfileResult (
349
366
profiler = 'rocPROF' ,
350
367
download_url = None ,
0 commit comments