|
14 | 14 | #include <ATen/core/TensorBody.h>
|
15 | 15 | #include <ATen/ops/unsqueeze_ops.h>
|
16 | 16 | #include <util/Optional.h>
|
| 17 | +#include <numeric> |
17 | 18 |
|
18 | 19 | registerMooseObject("SwiftApp", FFTMechanics);
|
19 | 20 |
|
@@ -45,6 +46,18 @@ FFTMechanics::validParams()
|
45 | 46 | 0,
|
46 | 47 | "Steps for diagonal estimation with Hutchinson's method used in "
|
47 | 48 | "Jacobi preconditioning. 0 skips preconditioning.");
|
| 49 | + params.addParam<bool>("block_jacobi", |
| 50 | + false, |
| 51 | + "Use block-Jacobi (local compliance) preconditioner instead of diagonal."); |
| 52 | + params.addParam<Real>("block_jacobi_damp", |
| 53 | + 1e-8, |
| 54 | + "Relative damping added to local tangent blocks before inversion."); |
| 55 | + params.addParam<Real>( |
| 56 | + "jacobi_min_rel", |
| 57 | + 1e-3, |
| 58 | + "Minimum relative floor for stochastic Jacobi diagonal (relative to median)."); |
| 59 | + params.addParam<Real>( |
| 60 | + "jacobi_inv_cap", 0.0, "Cap on inverse diagonal scaling; 0 disables clamping."); |
48 | 61 | params.addParam<bool>("verbose", false, "Print non-linear residuals.");
|
49 | 62 | return params;
|
50 | 63 | }
|
@@ -74,6 +87,10 @@ FFTMechanics::FFTMechanics(const InputParameters & parameters)
|
74 | 87 | ? &getInputBuffer("applied_macroscopic_strain")
|
75 | 88 | : nullptr),
|
76 | 89 | _hutchinson_steps(getParam<unsigned int>("hutchinson_steps")),
|
| 90 | + _block_jacobi(getParam<bool>("block_jacobi")), |
| 91 | + _block_jacobi_damp(getParam<Real>("block_jacobi_damp")), |
| 92 | + _jacobi_min_rel(getParam<Real>("jacobi_min_rel")), |
| 93 | + _jacobi_inv_cap(getParam<Real>("jacobi_inv_cap")), |
77 | 94 | _verbose(getParam<bool>("verbose"))
|
78 | 95 | {
|
79 | 96 | // Build projection tensor once
|
@@ -133,14 +150,58 @@ FFTMechanics::computeBuffer()
|
133 | 150 | // iterate as long as the iterative update does not vanish
|
134 | 151 | for (const auto iiter : make_range(_nl_max_its))
|
135 | 152 | {
|
136 |
| - const auto diag_precond = |
137 |
| - _hutchinson_steps ? torch::abs(estimateJacobiPreconditioner(G_K_dF, b, _hutchinson_steps)) |
138 |
| - : torch::ones_like(b); |
139 |
| - const auto M_inv = [&diag_precond](const torch::Tensor & x) { return x / diag_precond; }; |
| 153 | + c10::optional<torch::Tensor> invK4; |
| 154 | + const auto diag_estimate = |
| 155 | + (!_block_jacobi && _hutchinson_steps) |
| 156 | + ? torch::abs(estimateJacobiPreconditioner(G_K_dF, b, _hutchinson_steps)) |
| 157 | + : torch::ones_like(b); |
| 158 | + auto inv_diag = torch::ones_like(b); |
| 159 | + if (!_block_jacobi && _hutchinson_steps) |
| 160 | + { |
| 161 | + // Robust floor relative to a nonzero scale to avoid huge inverse scaling |
| 162 | + auto mask = diag_estimate > 1e-9; // ignore near-zero estimates |
| 163 | + auto selected = at::masked_select(diag_estimate, mask); |
| 164 | + auto scale_t = selected.numel() > 0 ? selected.mean() : diag_estimate.mean(); |
| 165 | + auto floor_t = scale_t * _jacobi_min_rel; |
| 166 | + auto diag_precond = torch::clamp(diag_estimate, floor_t, c10::nullopt); |
| 167 | + inv_diag = 1.0 / diag_precond; |
| 168 | + if (_jacobi_inv_cap > 0.0) |
| 169 | + { |
| 170 | + inv_diag = torch::clamp(inv_diag, 0.0, _jacobi_inv_cap); |
| 171 | + } |
| 172 | + } |
| 173 | + const auto M_inv = [&](const torch::Tensor & x) |
| 174 | + { |
| 175 | + if (_block_jacobi) |
| 176 | + { |
| 177 | + if (!invK4.has_value()) |
| 178 | + invK4 = MooseTensor::invertLocalBlocksDamped(_tK4, _block_jacobi_damp); |
| 179 | + auto x2 = x.reshape(_r2_shape); |
| 180 | + auto z2raw = MooseTensor::trans2(MooseTensor::ddot42(*invK4, MooseTensor::trans2(x2))); |
| 181 | + // Enforce zero-mean (remove k=0 mode) without FFT cost |
| 182 | + std::vector<int64_t> reduce_dims(z2raw.dim() - 2); |
| 183 | + std::iota(reduce_dims.begin(), reduce_dims.end(), 0); |
| 184 | + auto mean2 = z2raw.mean(reduce_dims, /*keepdim=*/true); |
| 185 | + auto z2 = z2raw - mean2; |
| 186 | + return z2.reshape(-1); |
| 187 | + } |
| 188 | + else |
| 189 | + { |
| 190 | + auto x2 = x.reshape(_r2_shape); |
| 191 | + auto z2raw = x2 * inv_diag.reshape(_r2_shape); |
| 192 | + // Enforce zero-mean (remove k=0 mode) without FFT cost |
| 193 | + std::vector<int64_t> reduce_dims(z2raw.dim() - 2); |
| 194 | + std::iota(reduce_dims.begin(), reduce_dims.end(), 0); |
| 195 | + auto mean2 = z2raw.mean(reduce_dims, /*keepdim=*/true); |
| 196 | + auto z2 = z2raw - mean2; |
| 197 | + return z2.reshape(-1); |
| 198 | + } |
| 199 | + }; |
140 | 200 |
|
141 | 201 | const auto [dFm_new, iterations, lnorm] =
|
142 |
| - _hutchinson_steps ? conjugateGradientSolve(G_K_dF, b, dFm, _l_tol, _l_max_its, M_inv) |
143 |
| - : conjugateGradientSolve(G_K_dF, b, dFm, _l_tol, _l_max_its); |
| 202 | + (_block_jacobi || _hutchinson_steps) |
| 203 | + ? conjugateGradientSolve(G_K_dF, b, dFm, _l_tol, _l_max_its, M_inv) |
| 204 | + : conjugateGradientSolve(G_K_dF, b, dFm, _l_tol, _l_max_its); |
144 | 205 | dFm = dFm_new;
|
145 | 206 |
|
146 | 207 | // update DOFs (array -> tens.grid)
|
|
0 commit comments