Skip to content

Commit f2e1ca3

Browse files
Merge pull request #1629 from ColomboMatte0/GPU_gibbs_priors
GPU and CPU gibbs priors refactoring
1 parent d4002c5 commit f2e1ca3

22 files changed

+2685
-91
lines changed

CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,9 @@ if(NOT DISABLE_STIR_CUDA)
222222
if("${CMAKE_VERSION}" VERSION_LESS "3.23")
223223
message(FATAL_ERROR "CMake 3.23 or newer is required to use CMAKE_CUDA_ARCHITECTURES set to 'all'. Upgrade your CMake or set DISABLE_STIR_CUDA to ON.")
224224
else()
225-
set(CMAKE_CUDA_ARCHITECTURES "all")
225+
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
226+
set(CMAKE_CUDA_ARCHITECTURES "all")
227+
endif()
226228
endif()
227229
message(STATUS "STIR CUDA support enabled. Using CUDA version ${CUDAToolkit_VERSION}.")
228230
set(STIR_WITH_CUDA ON)

documentation/release_6.3.htm

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,13 @@ <h4>General</h4>
4646
is disabled for backwards compatibility.<br>
4747
<a href=https://github.com/UCL/STIR/pull/1291>PR #1291</a>
4848
</li>
49+
<li>
50+
The priors code has been refactored to provide two common, parallelized base classes for CPU and GPU implementations (<code>GibbsPenalty</code> and <code>CudaGibbsPenalty</code>).<br>
51+
On CPU, we introduce <code>GibbsQuadraticPenalty</code> and <code>GibbsRelativeDifferencePenalty</code>, which inherit from the parallelized <code>GibbsPenalty</code> base class (now parallelized using OpenMP).<br>
52+
GPU implementations are also provided as <code>CudaGibbsQuadraticPenalty</code> and <code>CudaGibbsRelativeDifferencePenalty</code>.<br>
53+
Currently, the new Gibbs penalties are still missing the paraboloidal surrogates related method implementations. They provide two new methods: <code>compute_gradient_times_input</code> and <code>compute_Hessian_diagonal</code>.<br>
54+
<a href="https://github.com/UCL/STIR/pull/1629">PR #1629</a>
55+
</li>
4956
<li>
5057
Data from GE Discovery MI systems in RDF9 should now be readable. TOF information on these scanners has also been
5158
added.
@@ -245,7 +252,7 @@ <h3>Bug fixes</h3>
245252
</li>
246253
</ul>
247254

248-
<h3>New deprecations</h3>
255+
<h3>New Deprecations and renames</h3>
249256
<ul>
250257
<li>
251258
<code>truncate_end_planes</code> will be removed in v7.0
@@ -255,6 +262,11 @@ <h3>New deprecations</h3>
255262
CMake options <tt>STIR_PROJECTORS_AS_V3</tt>, <tt>STIR_LEGACY_IGNORE_VIEW_OFFSET</tt> and
256263
<tt>STIR_ROOT_ROTATION_AS_V4</tt> will therefore be removed.
257264
</li>
265+
<li>
266+
Starting from v7.0, <code>GeneralizedPrior</code> will be renamed to <code>GeneralizedPenalty</code>.<br>
267+
<code>QuadraticPrior</code>, <code>RelativeDifferencePrior</code>, and <code>LogcoshPrior</code> will be removed in favor of the new <code>GibbsPenalty</code> framework.
268+
<a href=https://github.com/UCL/STIR/issues/1426>Issue #1426</a>
269+
</li>
258270
</ul>
259271

260272
<h3>Test changes</h3>

recon_test_pack/OSMAPOSL_test_PMFromFile_QPweights.par

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@ data_filename:=my_PMRT.pm
3131
End Projection Matrix By Bin From File Parameters:=
3232
End Projector Pair Using Matrix Parameters :=
3333

34-
prior type := quadratic
35-
Quadratic Prior Parameters:=
34+
prior type := Gibbs Quadratic
35+
Gibbs Quadratic Penalty Parameters:=
3636
penalisation factor := .9
3737
weights:={{{0,1,0},{1,0,1},{0,1,0}}}
38-
END Quadratic Prior Parameters:=
38+
END Gibbs Quadratic Penalty Parameters:=
3939

4040

4141
end PoissonLogLikelihoodWithLinearModelForMeanAndProjData Parameters:=

recon_test_pack/OSMAPOSL_test_PM_QP.par

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@ projector pair type := Matrix
2222
End Ray tracing matrix parameters :=
2323
End Projector Pair Using Matrix Parameters :=
2424

25-
prior type := quadratic
26-
Quadratic Prior Parameters:=
25+
prior type := Gibbs Quadratic
26+
Gibbs Quadratic Penalty Parameters:=
2727
penalisation factor := 0.5
2828
; next defaults to 0, set to 1 for 2D inverse Euclidean weights, 0 for 3D
2929
only 2D:= 0
30-
END Quadratic Prior Parameters:=
30+
END Gibbs Quadratic Penalty Parameters:=
3131

3232
end PoissonLogLikelihoodWithLinearModelForMeanAndProjData Parameters:=
3333

recon_test_pack/OSMAPOSL_test_PM_QPweights.par

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ projector pair type := Matrix
2020
End Ray tracing matrix parameters :=
2121
End Projector Pair Using Matrix Parameters :=
2222

23-
prior type := quadratic
24-
Quadratic Prior Parameters:=
23+
prior type := Gibbs Quadratic
24+
Gibbs Quadratic Penalty Parameters:=
2525
penalisation factor := .9
2626
weights:={{{0,1,0},{1,0,1},{0,1,0}}}
27-
END Quadratic Prior Parameters:=
27+
END Gibbs Quadratic Penalty Parameters:=
2828

2929

3030
end PoissonLogLikelihoodWithLinearModelForMeanAndProjData Parameters:=

src/include/stir/cuda_utilities.h

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*
22
Copyright (C) 2024, University College London
3+
Copyright (C) 2025, University of Milano-Bicocca
34
This file is part of STIR.
45
56
SPDX-License-Identifier: Apache-2.0
@@ -16,13 +17,40 @@
1617
\brief some utilities for STIR and CUDA
1718
1819
\author Kris Thielemans
20+
\author Matteo Neel Colombo
1921
*/
2022
#include "stir/Array.h"
2123
#include "stir/info.h"
24+
#include "stir/error.h"
2225
#include <vector>
2326

2427
START_NAMESPACE_STIR
2528

29+
#ifndef __CUDACC__
30+
# ifndef __host__
31+
# define __host__
32+
# endif
33+
# ifndef __device__
34+
# define __device__
35+
# endif
36+
#endif
37+
38+
#ifndef __CUDACC__
39+
struct cuda_dim3
40+
{
41+
unsigned int x = 1, y = 1, z = 1;
42+
};
43+
struct cuda_int3
44+
{
45+
int x = 0, y = 0, z = 0;
46+
};
47+
#else
48+
# include <cuda_runtime.h>
49+
typedef dim3 cuda_dim3;
50+
typedef int3 cuda_int3;
51+
#endif
52+
53+
#ifdef __CUDACC__
2654
template <int num_dimensions, typename elemT>
2755
inline void
2856
array_to_device(elemT* dev_data, const Array<num_dimensions, elemT>& stir_array)
@@ -64,6 +92,62 @@ array_to_host(Array<num_dimensions, elemT>& stir_array, const elemT* dev_data)
6492
}
6593
}
6694

67-
END_NAMESPACE_STIR
95+
//! \brief Performs a parallel reduction sum on shared memory within a CUDA thread block, final value stored in shared_mem[0].
96+
template <typename elemT>
97+
__device__ inline void
98+
blockReduction(elemT* shared_mem, int thread_in_block, int block_threads)
99+
{
100+
for (int stride = block_threads / 2; stride > 0; stride /= 2)
101+
{
102+
if (thread_in_block < stride)
103+
shared_mem[thread_in_block] += shared_mem[thread_in_block + stride];
104+
__syncthreads();
105+
}
106+
}
107+
108+
//! \brief Provides atomic addition for double values with fallback for pre-Pascal GPU architectures.
109+
template <typename elemT>
110+
__device__ inline double
111+
atomicAddGeneric(double* address, elemT val)
112+
{
113+
# if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600
114+
return atomicAdd(address, static_cast<double>(val));
115+
# else
116+
if (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0 && blockIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0)
117+
{
118+
printf("CudaGibbsPenalty: atomicAdd(double) unsupported on this GPU. "
119+
"Upgrade to compute capability >= 6.0 or check code at "
120+
"sources/STIR/src/include/stir/cuda_utilities.h:108.\n");
121+
asm volatile("trap;");
122+
}
123+
return 0.0; // never reached
124+
// Emulate atomicAdd for double precision on pre-Pascal architectures
125+
// unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(address);
126+
// unsigned long long int old = *address_as_ull, assumed;
68127

128+
// do
129+
// {
130+
// assumed = old;
131+
// double updated = __longlong_as_double(assumed) + dval;
132+
// old = atomicCAS(address_as_ull, assumed, __double_as_longlong(updated));
133+
// } while (assumed != old);
134+
135+
// return __longlong_as_double(old);
136+
# endif
137+
}
138+
139+
//! \brief Utility function to check for CUDA errors and report them with context information.
140+
inline void
141+
checkCudaError(const std::string& operation)
142+
{
143+
cudaError_t cuda_error = cudaGetLastError();
144+
if (cuda_error != cudaSuccess)
145+
{
146+
const char* err = cudaGetErrorString(cuda_error);
147+
error(std::string("CudaGibbsPrior: CUDA error in ") + operation + ": " + err);
148+
}
149+
}
150+
#endif
151+
152+
END_NAMESPACE_STIR
69153
#endif

0 commit comments

Comments
 (0)