Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions docs/documentation/gpuParallelization.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,16 @@ Note: Ordering is not guaranteed or stable, so use key-value pairing when using

**Macro Invocation**

Uses FYPP eval directive using `#:call`
In order to parallelize a loop, simply place two macro calls on either end of the loop:

```C
#:call GPU_PARALLEL_LOOP(...)
$:$GPU_PARALLEL_LOOP(...)
{code}
#:endcall GPU_PARALLEL_LOOP
$:END_GPU_PARALLEL_LOOP()
```

This wraps the lines in `code` with parallelization calls to openACC or openMP, depending on environment and compiler settings.

Comment on lines +47 to +56
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Fix GPU_PARALLEL_LOOP docs to match new API and syntax

  • The invocation snippet should use $:GPU_PARALLEL_LOOP(...) (single $:), not $:$GPU_PARALLEL_LOOP(...).
  • The parameter table and example still describe the old #:call GPU_PARALLEL_LOOP(code=..., ...) style and a required code parameter, but the new API uses start/end wrappers without a code argument.

Consider updating this section so the parameters and examples reflect the new block-style $:GPU_PARALLEL_LOOP(...)\n{code}\n$:END_GPU_PARALLEL_LOOP() usage.

Also applies to: 99-108

🤖 Prompt for AI Agents
In docs/documentation/gpuParallelization.md around lines 47 to 56 (and similarly
update lines 99 to 108), the example and parameter table document the old API
and use an incorrect invocation ($:$GPU_PARALLEL_LOOP); update the example to
use the correct single-prefix block style
$:GPU_PARALLEL_LOOP(...)\n{code}\n$:END_GPU_PARALLEL_LOOP(), remove any
references to a required code parameter or the #:call GPU_PARALLEL_LOOP(...)
form, and rewrite the parameter list and examples to describe the start/end
wrapper semantics (what parameters the start macro accepts and that code is
provided between the macro pairs). Ensure both the example snippet and the
parameter table reflect the new block-style API consistently.

**Parameters**

| name | data type | Default Value | description |
Expand Down
7 changes: 2 additions & 5 deletions src/common/include/acc_macros.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@
$:end_acc_directive
#:enddef

#:def ACC_PARALLEL_LOOP(code, collapse=None, private=None, parallelism='[gang, vector]', &
#:def ACC_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
& default='present', firstprivate=None, reduction=None, reductionOp=None, &
& copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
& no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None)
Expand All @@ -155,10 +155,7 @@
& deviceptr_val.strip('\n') + attach_val.strip('\n')
#:set acc_directive = '!$acc parallel loop ' + &
& clause_val + extraAccArgs_val.strip('\n')
#:set acc_end_directive = '!$acc end parallel loop'
$:acc_directive
$:code
$:acc_end_directive
#:enddef

#:def ACC_ROUTINE(function_name=None, parallelism=None, nohost=False, extraAccArgs=None)
Expand Down Expand Up @@ -308,4 +305,4 @@
#:set acc_directive = '!$acc wait ' + clause_val + extraAccArgs_val.strip('\n')
$:acc_directive
#:enddef
! New line at end of file is required for FYPP
! New line at end of file is required for FYPP
21 changes: 15 additions & 6 deletions src/common/include/omp_macros.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@
$:omp_end_directive
#:enddef

#:def OMP_PARALLEL_LOOP(code, collapse=None, private=None, parallelism='[gang, vector]', &
#:def OMP_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
& default='present', firstprivate=None, reduction=None, reductionOp=None, &
& copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
& no_create=None, present=None, deviceptr=None, attach=None, extraOmpArgs=None)
Expand Down Expand Up @@ -178,21 +178,30 @@

#:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID
#:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
#:set omp_end_directive = '!$omp end target teams loop'
#:elif MFC_COMPILER == CCE_COMPILER_ID
#:set omp_start_directive = '!$omp target teams distribute parallel do simd defaultmap(firstprivate:scalar) '
#:set omp_end_directive = '!$omp end target teams distribute parallel do simd'
#:elif MFC_COMPILER == AMD_COMPILER_ID
#:set omp_start_directive = '!$omp target teams distribute parallel do '
#:set omp_end_directive = '!$omp end target teams distribute parallel do'
#:else
#:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
#:set omp_end_directive = '!$omp end target teams loop'
#:endif

#:set omp_directive = omp_start_directive + clause_val + extraOmpArgs_val.strip('\n')
$:omp_directive
$:code
#:enddef

#:def END_OMP_PARALLEL_LOOP()

#:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID
#:set omp_end_directive = '!$omp end target teams loop'
#:elif MFC_COMPILER == CCE_COMPILER_ID
#:set omp_end_directive = '!$omp end target teams distribute parallel do simd'
#:elif MFC_COMPILER == AMD_COMPILER_ID
#:set omp_end_directive = '!$omp end target teams distribute parallel do'
#:else
#:set omp_end_directive = '!$omp end target teams loop'
#:endif

$:omp_end_directive
#:enddef

Expand Down
26 changes: 19 additions & 7 deletions src/common/include/parallel_macros.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,33 @@

#:enddef

#:def GPU_PARALLEL_LOOP(code, collapse=None, private=None, parallelism='[gang, vector]', &
#:def GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
& default='present', firstprivate=None, reduction=None, reductionOp=None, &
& copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
& no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None)

#:set acc_code = ACC_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs)
#:set omp_code = OMP_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs)
#:set acc_directive = ACC_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs)
#:set omp_directive = OMP_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs)

#if defined(MFC_OpenACC)
$:acc_code
$:acc_directive
#elif defined(MFC_OpenMP)
$:omp_code
#else
$:code
$:omp_directive
#endif

#:enddef

#:def END_GPU_PARALLEL_LOOP()

#:set acc_end_directive = '!$acc end parallel loop'
#:set omp_end_directive = END_OMP_PARALLEL_LOOP()

#if defined(MFC_OpenACC)
$:acc_end_directive
#elif defined(MFC_OpenMP)
$:omp_end_directive
#endif

#:enddef

#:def GPU_ROUTINE(function_name=None, parallelism=None, nohost=False, cray_inline=False, extraAccArgs=None, extraOmpArgs=None)
Expand Down
2 changes: 1 addition & 1 deletion src/common/include/shared_parallel_macros.fpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,4 +107,4 @@
#:endif
$:extraArgs_val
#:enddef
! New line at end of file is required for FYPP
! New line at end of file is required for FYPP
Loading
Loading