MFlowCode · danieljvickers · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025
@@ -44,14 +44,16 @@ Note: Ordering is not guaranteed or stable, so use key-value pairing when using
 
 **Macro Invocation**
 
-Uses FYPP eval directive using `#:call`
+In order to parallelize a loop, simply place two macro calls on either end of the loop:
 
 ```C
-#:call GPU_PARALLEL_LOOP(...)
+$:$GPU_PARALLEL_LOOP(...)
    {code}
-#:endcall GPU_PARALLEL_LOOP
+$:END_GPU_PARALLEL_LOOP()
 ```
 
+This wraps the lines in `code` with parallelization calls to openACC or openMP, depending on environment and compiler settings.
+
 **Parameters**
 
 | name             | data type           | Default Value     | description                                                                               |

@@ -129,7 +129,7 @@
     $:end_acc_directive
 #:enddef
 
-#:def ACC_PARALLEL_LOOP(code, collapse=None, private=None, parallelism='[gang, vector]', &
+#:def ACC_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
     & default='present', firstprivate=None, reduction=None, reductionOp=None, &
     & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
     & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None)
@@ -155,10 +155,7 @@
         & deviceptr_val.strip('\n') + attach_val.strip('\n')
     #:set acc_directive = '!$acc parallel loop ' + &
         & clause_val + extraAccArgs_val.strip('\n')
-    #:set acc_end_directive = '!$acc end parallel loop'
     $:acc_directive
-    $:code
-    $:acc_end_directive
 #:enddef
 
 #:def ACC_ROUTINE(function_name=None, parallelism=None, nohost=False, extraAccArgs=None)
@@ -308,4 +305,4 @@
     #:set acc_directive = '!$acc wait ' + clause_val + extraAccArgs_val.strip('\n')
     $:acc_directive
 #:enddef
-! New line at end of file is required for FYPP
+! New line at end of file is required for FYPP
@@ -149,7 +149,7 @@
     $:omp_end_directive
 #:enddef
 
-#:def OMP_PARALLEL_LOOP(code, collapse=None, private=None, parallelism='[gang, vector]', &
+#:def OMP_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
     & default='present', firstprivate=None, reduction=None, reductionOp=None, &
     & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
     & no_create=None, present=None, deviceptr=None, attach=None, extraOmpArgs=None)
@@ -178,21 +178,30 @@
 
     #:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID
         #:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
-        #:set omp_end_directive = '!$omp end target teams loop'
     #:elif MFC_COMPILER == CCE_COMPILER_ID
         #:set omp_start_directive = '!$omp target teams distribute parallel do simd defaultmap(firstprivate:scalar) '
-        #:set omp_end_directive = '!$omp end target teams distribute parallel do simd'
     #:elif MFC_COMPILER == AMD_COMPILER_ID
         #:set omp_start_directive = '!$omp target teams distribute parallel do '
-        #:set omp_end_directive = '!$omp end target teams distribute parallel do'
     #:else
         #:set omp_start_directive = '!$omp target teams loop defaultmap(firstprivate:scalar) bind(teams,parallel) '
-        #:set omp_end_directive = '!$omp end target teams loop'
     #:endif
 
     #:set omp_directive = omp_start_directive + clause_val + extraOmpArgs_val.strip('\n')
     $:omp_directive
-    $:code
+#:enddef
+
+#:def END_OMP_PARALLEL_LOOP()
+
+    #:if MFC_COMPILER == NVIDIA_COMPILER_ID or MFC_COMPILER == PGI_COMPILER_ID
+        #:set omp_end_directive = '!$omp end target teams loop'
+    #:elif MFC_COMPILER == CCE_COMPILER_ID
+        #:set omp_end_directive = '!$omp end target teams distribute parallel do simd'
+    #:elif MFC_COMPILER == AMD_COMPILER_ID
+        #:set omp_end_directive = '!$omp end target teams distribute parallel do'
+    #:else
+        #:set omp_end_directive = '!$omp end target teams loop'
+    #:endif
+
     $:omp_end_directive
 #:enddef
 

@@ -19,21 +19,33 @@
 
 #:enddef
 
-#:def GPU_PARALLEL_LOOP(code, collapse=None, private=None, parallelism='[gang, vector]', &
+#:def GPU_PARALLEL_LOOP(collapse=None, private=None, parallelism='[gang, vector]', &
     & default='present', firstprivate=None, reduction=None, reductionOp=None, &
     & copy=None, copyin=None, copyinReadOnly=None, copyout=None, create=None, &
     & no_create=None, present=None, deviceptr=None, attach=None, extraAccArgs=None, extraOmpArgs=None)
 
-    #:set acc_code = ACC_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs)
-    #:set omp_code = OMP_PARALLEL_LOOP(code, collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs)
+    #:set acc_directive = ACC_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraAccArgs)
+    #:set omp_directive = OMP_PARALLEL_LOOP(collapse, private, parallelism, default, firstprivate, reduction, reductionOp, copy, copyin, copyinReadOnly, copyout, create, no_create, present, deviceptr, attach, extraOmpArgs)
 
 #if defined(MFC_OpenACC)
-    $:acc_code
+    $:acc_directive
 #elif defined(MFC_OpenMP)
-    $:omp_code
-#else
-    $:code
+    $:omp_directive
 #endif
+
+#:enddef
+
+#:def END_GPU_PARALLEL_LOOP()
+
+    #:set acc_end_directive = '!$acc end parallel loop'
+    #:set omp_end_directive = END_OMP_PARALLEL_LOOP()
+
+#if defined(MFC_OpenACC)
+    $:acc_end_directive
+#elif defined(MFC_OpenMP)
+    $:omp_end_directive
+#endif
+
 #:enddef
 
 #:def GPU_ROUTINE(function_name=None, parallelism=None, nohost=False, cray_inline=False, extraAccArgs=None, extraOmpArgs=None)

@@ -107,4 +107,4 @@
     #:endif
     $:extraArgs_val
 #:enddef
-! New line at end of file is required for FYPP
+! New line at end of file is required for FYPP