|  | 
| 1 | 1 | /* | 
| 2 | 2 |     Copyright (C) 2024, University College London | 
|  | 3 | +    Copyright (C) 2025, University of Milano-Bicocca | 
| 3 | 4 |     This file is part of STIR. | 
| 4 | 5 | 
 | 
| 5 | 6 |     SPDX-License-Identifier: Apache-2.0 | 
|  | 
| 16 | 17 |   \brief some utilities for STIR and CUDA | 
| 17 | 18 | 
 | 
| 18 | 19 |   \author Kris Thielemans | 
|  | 20 | +  \author Matteo Neel Colombo | 
| 19 | 21 | */ | 
| 20 | 22 | #include "stir/Array.h" | 
| 21 | 23 | #include "stir/info.h" | 
|  | 24 | +#include "stir/error.h" | 
| 22 | 25 | #include <vector> | 
| 23 | 26 | 
 | 
| 24 | 27 | START_NAMESPACE_STIR | 
| 25 | 28 | 
 | 
|  | 29 | +#ifndef __CUDACC__ | 
|  | 30 | +#  ifndef __host__ | 
|  | 31 | +#    define __host__ | 
|  | 32 | +#  endif | 
|  | 33 | +#  ifndef __device__ | 
|  | 34 | +#    define __device__ | 
|  | 35 | +#  endif | 
|  | 36 | +#endif | 
|  | 37 | + | 
|  | 38 | +#ifndef __CUDACC__ | 
|  | 39 | +struct cuda_dim3 | 
|  | 40 | +{ | 
|  | 41 | +  unsigned int x = 1, y = 1, z = 1; | 
|  | 42 | +}; | 
|  | 43 | +struct cuda_int3 | 
|  | 44 | +{ | 
|  | 45 | +  int x = 0, y = 0, z = 0; | 
|  | 46 | +}; | 
|  | 47 | +#else | 
|  | 48 | +#  include <cuda_runtime.h> | 
|  | 49 | +typedef dim3 cuda_dim3; | 
|  | 50 | +typedef int3 cuda_int3; | 
|  | 51 | +#endif | 
|  | 52 | + | 
|  | 53 | +#ifdef __CUDACC__ | 
| 26 | 54 | template <int num_dimensions, typename elemT> | 
| 27 | 55 | inline void | 
| 28 | 56 | array_to_device(elemT* dev_data, const Array<num_dimensions, elemT>& stir_array) | 
| @@ -64,6 +92,62 @@ array_to_host(Array<num_dimensions, elemT>& stir_array, const elemT* dev_data) | 
| 64 | 92 |     } | 
| 65 | 93 | } | 
| 66 | 94 | 
 | 
| 67 |  | -END_NAMESPACE_STIR | 
|  | 95 | +//! \brief Performs a parallel reduction sum on shared memory within a CUDA thread block, final value stored in shared_mem[0]. | 
|  | 96 | +template <typename elemT> | 
|  | 97 | +__device__ inline void | 
|  | 98 | +blockReduction(elemT* shared_mem, int thread_in_block, int block_threads) | 
|  | 99 | +{ | 
|  | 100 | +  for (int stride = block_threads / 2; stride > 0; stride /= 2) | 
|  | 101 | +    { | 
|  | 102 | +      if (thread_in_block < stride) | 
|  | 103 | +        shared_mem[thread_in_block] += shared_mem[thread_in_block + stride]; | 
|  | 104 | +      __syncthreads(); | 
|  | 105 | +    } | 
|  | 106 | +} | 
|  | 107 | + | 
|  | 108 | +//! \brief Provides atomic addition for double values with fallback for pre-Pascal GPU architectures. | 
|  | 109 | +template <typename elemT> | 
|  | 110 | +__device__ inline double | 
|  | 111 | +atomicAddGeneric(double* address, elemT val) | 
|  | 112 | +{ | 
|  | 113 | +#  if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 | 
|  | 114 | +  return atomicAdd(address, static_cast<double>(val)); | 
|  | 115 | +#  else | 
|  | 116 | +  if (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0 && blockIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0) | 
|  | 117 | +    { | 
|  | 118 | +      printf("CudaGibbsPenalty: atomicAdd(double) unsupported on this GPU. " | 
|  | 119 | +             "Upgrade to compute capability >= 6.0 or check code at " | 
|  | 120 | +             "sources/STIR/src/include/stir/cuda_utilities.h:108.\n"); | 
|  | 121 | +      asm volatile("trap;"); | 
|  | 122 | +    } | 
|  | 123 | +  return 0.0; // never reached | 
|  | 124 | +              // Emulate atomicAdd for double precision on pre-Pascal architectures | 
|  | 125 | +              // unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(address); | 
|  | 126 | +              // unsigned long long int old = *address_as_ull, assumed; | 
| 68 | 127 | 
 | 
|  | 128 | +  // do | 
|  | 129 | +  //   { | 
|  | 130 | +  //     assumed = old; | 
|  | 131 | +  //     double updated = __longlong_as_double(assumed) + dval; | 
|  | 132 | +  //     old = atomicCAS(address_as_ull, assumed, __double_as_longlong(updated)); | 
|  | 133 | +  // } while (assumed != old); | 
|  | 134 | + | 
|  | 135 | +  // return __longlong_as_double(old); | 
|  | 136 | +#  endif | 
|  | 137 | +} | 
|  | 138 | + | 
|  | 139 | +//! \brief Utility function to check for CUDA errors and report them with context information. | 
|  | 140 | +inline void | 
|  | 141 | +checkCudaError(const std::string& operation) | 
|  | 142 | +{ | 
|  | 143 | +  cudaError_t cuda_error = cudaGetLastError(); | 
|  | 144 | +  if (cuda_error != cudaSuccess) | 
|  | 145 | +    { | 
|  | 146 | +      const char* err = cudaGetErrorString(cuda_error); | 
|  | 147 | +      error(std::string("CudaGibbsPrior: CUDA error in ") + operation + ": " + err); | 
|  | 148 | +    } | 
|  | 149 | +} | 
|  | 150 | +#endif | 
|  | 151 | + | 
|  | 152 | +END_NAMESPACE_STIR | 
| 69 | 153 | #endif | 
0 commit comments