Skip to content

Commit fc1e5fb

Browse files
committed
Block Jacobi (#69)
1 parent c573e9a commit fc1e5fb

File tree

6 files changed

+142
-9
lines changed

6 files changed

+142
-9
lines changed

examples/degeus_mechanics/mech.i

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
ymax = ${fparse 2*pi}
88
zmax = ${fparse 2*pi}
99
mesh_mode = DUMMY
10-
device_names = cuda
10+
device_names = cpu
1111
[]
1212

1313
[TensorComputes]
@@ -72,7 +72,11 @@
7272
constitutive_model = hyper_elasticity
7373
stress = stress
7474
applied_macroscopic_strain = applied_strain
75-
hutchinson_steps = 6
75+
# hutchinson_steps = 64
76+
# jacobi_min_rel = 1e-2
77+
# jacobi_inv_cap = 1e4
78+
block_jacobi = true
79+
block_jacobi_damp=1e-1
7680
verbose = true
7781
[]
7882
[]

include/tensor_buffers/NEML2TensorBuffer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class NEML2TensorBuffer : public TensorBuffer<T>
2121

2222
NEML2TensorBuffer(const InputParameters & parameters);
2323

24-
virtual void init();
24+
virtual void init() override;
2525
virtual void makeCPUCopy() override;
2626

2727
using TensorBuffer<T>::_u;

include/tensor_computes/FFTMechanics.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,16 @@ class FFTMechanics : public TensorOperator<>
6767
/// steps for diagonal estimation
6868
const unsigned int _hutchinson_steps;
6969

70+
/// use block-Jacobi (local compliance) preconditioner
71+
const bool _block_jacobi;
72+
/// relative damping for block-Jacobi inversion
73+
const Real _block_jacobi_damp;
74+
75+
/// minimum relative floor for Jacobi diagonal (relative to median)
76+
const Real _jacobi_min_rel;
77+
/// cap on inverse diagonal scaling (0 disables)
78+
const Real _jacobi_inv_cap;
79+
7080
/// add diagnostic output for iterations
7181
const bool _verbose;
7282

include/utils/SwiftUtils.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,16 @@ torch::Tensor dot24(const torch::Tensor & A2, const torch::Tensor & B4);
5151
torch::Tensor dot42(const torch::Tensor & A4, const torch::Tensor & B2);
5252
torch::Tensor dyad22(const torch::Tensor & A2, const torch::Tensor & B2);
5353

54+
// Invert local 4th-order blocks (...., i, j, k, l) -> (...., i, j, k, l)
55+
// Treats each grid point's (i,j)-(k,l) matrix as a (d*d x d*d) block and inverts it in batch.
56+
// Returns a tensor with the same shape as the input, containing per-point block inverses.
57+
torch::Tensor invertLocalBlocks(const torch::Tensor & K4);
58+
59+
// Damped inversion of local 4th-order blocks with optional pinv fallback.
60+
// damp_rel scales an identity added to each (d*d x d*d) block by damp_rel * mean(|diag|).
61+
// If inversion fails, falls back to pseudo-inverse.
62+
torch::Tensor invertLocalBlocksDamped(const torch::Tensor & K4, double damp_rel = 1e-8);
63+
5464
torch::Tensor
5565
estimateJacobiPreconditioner(const std::function<torch::Tensor(const torch::Tensor &)> & A,
5666
const torch::Tensor & template_vec,

src/tensor_computes/FFTMechanics.C

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <ATen/core/TensorBody.h>
1515
#include <ATen/ops/unsqueeze_ops.h>
1616
#include <util/Optional.h>
17+
#include <numeric>
1718

1819
registerMooseObject("SwiftApp", FFTMechanics);
1920

@@ -45,6 +46,18 @@ FFTMechanics::validParams()
4546
0,
4647
"Steps for diagonal estimation with Hutchinson's method used in "
4748
"Jacobi preconditioning. 0 skips preconditioning.");
49+
params.addParam<bool>("block_jacobi",
50+
false,
51+
"Use block-Jacobi (local compliance) preconditioner instead of diagonal.");
52+
params.addParam<Real>("block_jacobi_damp",
53+
1e-8,
54+
"Relative damping added to local tangent blocks before inversion.");
55+
params.addParam<Real>(
56+
"jacobi_min_rel",
57+
1e-3,
58+
"Minimum relative floor for stochastic Jacobi diagonal (relative to median).");
59+
params.addParam<Real>(
60+
"jacobi_inv_cap", 0.0, "Cap on inverse diagonal scaling; 0 disables clamping.");
4861
params.addParam<bool>("verbose", false, "Print non-linear residuals.");
4962
return params;
5063
}
@@ -74,6 +87,10 @@ FFTMechanics::FFTMechanics(const InputParameters & parameters)
7487
? &getInputBuffer("applied_macroscopic_strain")
7588
: nullptr),
7689
_hutchinson_steps(getParam<unsigned int>("hutchinson_steps")),
90+
_block_jacobi(getParam<bool>("block_jacobi")),
91+
_block_jacobi_damp(getParam<Real>("block_jacobi_damp")),
92+
_jacobi_min_rel(getParam<Real>("jacobi_min_rel")),
93+
_jacobi_inv_cap(getParam<Real>("jacobi_inv_cap")),
7794
_verbose(getParam<bool>("verbose"))
7895
{
7996
// Build projection tensor once
@@ -133,14 +150,58 @@ FFTMechanics::computeBuffer()
133150
// iterate as long as the iterative update does not vanish
134151
for (const auto iiter : make_range(_nl_max_its))
135152
{
136-
const auto diag_precond =
137-
_hutchinson_steps ? torch::abs(estimateJacobiPreconditioner(G_K_dF, b, _hutchinson_steps))
138-
: torch::ones_like(b);
139-
const auto M_inv = [&diag_precond](const torch::Tensor & x) { return x / diag_precond; };
153+
c10::optional<torch::Tensor> invK4;
154+
const auto diag_estimate =
155+
(!_block_jacobi && _hutchinson_steps)
156+
? torch::abs(estimateJacobiPreconditioner(G_K_dF, b, _hutchinson_steps))
157+
: torch::ones_like(b);
158+
auto inv_diag = torch::ones_like(b);
159+
if (!_block_jacobi && _hutchinson_steps)
160+
{
161+
// Robust floor relative to a nonzero scale to avoid huge inverse scaling
162+
auto mask = diag_estimate > 1e-9; // ignore near-zero estimates
163+
auto selected = at::masked_select(diag_estimate, mask);
164+
auto scale_t = selected.numel() > 0 ? selected.mean() : diag_estimate.mean();
165+
auto floor_t = scale_t * _jacobi_min_rel;
166+
auto diag_precond = torch::clamp(diag_estimate, floor_t, c10::nullopt);
167+
inv_diag = 1.0 / diag_precond;
168+
if (_jacobi_inv_cap > 0.0)
169+
{
170+
inv_diag = torch::clamp(inv_diag, 0.0, _jacobi_inv_cap);
171+
}
172+
}
173+
const auto M_inv = [&](const torch::Tensor & x)
174+
{
175+
if (_block_jacobi)
176+
{
177+
if (!invK4.has_value())
178+
invK4 = MooseTensor::invertLocalBlocksDamped(_tK4, _block_jacobi_damp);
179+
auto x2 = x.reshape(_r2_shape);
180+
auto z2raw = MooseTensor::trans2(MooseTensor::ddot42(*invK4, MooseTensor::trans2(x2)));
181+
// Enforce zero-mean (remove k=0 mode) without FFT cost
182+
std::vector<int64_t> reduce_dims(z2raw.dim() - 2);
183+
std::iota(reduce_dims.begin(), reduce_dims.end(), 0);
184+
auto mean2 = z2raw.mean(reduce_dims, /*keepdim=*/true);
185+
auto z2 = z2raw - mean2;
186+
return z2.reshape(-1);
187+
}
188+
else
189+
{
190+
auto x2 = x.reshape(_r2_shape);
191+
auto z2raw = x2 * inv_diag.reshape(_r2_shape);
192+
// Enforce zero-mean (remove k=0 mode) without FFT cost
193+
std::vector<int64_t> reduce_dims(z2raw.dim() - 2);
194+
std::iota(reduce_dims.begin(), reduce_dims.end(), 0);
195+
auto mean2 = z2raw.mean(reduce_dims, /*keepdim=*/true);
196+
auto z2 = z2raw - mean2;
197+
return z2.reshape(-1);
198+
}
199+
};
140200

141201
const auto [dFm_new, iterations, lnorm] =
142-
_hutchinson_steps ? conjugateGradientSolve(G_K_dF, b, dFm, _l_tol, _l_max_its, M_inv)
143-
: conjugateGradientSolve(G_K_dF, b, dFm, _l_tol, _l_max_its);
202+
(_block_jacobi || _hutchinson_steps)
203+
? conjugateGradientSolve(G_K_dF, b, dFm, _l_tol, _l_max_its, M_inv)
204+
: conjugateGradientSolve(G_K_dF, b, dFm, _l_tol, _l_max_its);
144205
dFm = dFm_new;
145206

146207
// update DOFs (array -> tens.grid)

src/utils/SwiftUtils.C

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
#include "SwiftApp.h"
1111
#include "MooseUtils.h"
1212
#include "Moose.h"
13+
// for batched linear algebra
14+
#include <ATen/ops/linalg_inv.h>
15+
#include <ATen/ops/linalg_pinv.h>
1316

1417
namespace MooseTensor
1518
{
@@ -218,6 +221,51 @@ printBuffer(const torch::Tensor & t, const unsigned int & precision, const unsig
218221
std::cout << std::endl;
219222
}
220223
}
224+
// Invert local 4th-order blocks (batch of d*d x d*d matrices)
225+
torch::Tensor
226+
invertLocalBlocks(const torch::Tensor & K4)
227+
{
228+
// Expect shape: [..., d, d, d, d]
229+
const auto d = K4.size(-1);
230+
// Flatten last 4 dims to (d*d, d*d) with batch = prod(leading dims)
231+
auto K2 = K4.reshape({-1, d * d, d * d});
232+
// Batched inverse
233+
auto K2_inv = at::linalg_inv(K2);
234+
// Restore original shape
235+
return K2_inv.reshape(K4.sizes());
236+
}
237+
238+
torch::Tensor
239+
invertLocalBlocksDamped(const torch::Tensor & K4, double damp_rel)
240+
{
241+
// Flatten to (batch, n, n)
242+
const auto d = K4.size(-1);
243+
const auto n = d * d;
244+
auto K2 = K4.reshape({-1, n, n});
245+
246+
// Build batched identity
247+
auto I = torch::eye(n, K4.options()).unsqueeze(0).expand({K2.size(0), n, n});
248+
249+
// Scale damping by mean absolute diagonal across batch
250+
auto diag = K2.diagonal(0, -2, -1);
251+
double scale = diag.abs().mean().template item<double>();
252+
if (!(scale > 0.0))
253+
scale = 1.0;
254+
const double eps = damp_rel > 0.0 ? damp_rel * scale : 0.0;
255+
256+
auto K2_reg = eps > 0.0 ? (K2 + eps * I) : K2;
257+
258+
try
259+
{
260+
auto K2_inv = at::linalg_inv(K2_reg);
261+
return K2_inv.reshape(K4.sizes());
262+
}
263+
catch (const c10::Error &)
264+
{
265+
auto K2_pinv = at::linalg_pinv(K2_reg);
266+
return K2_pinv.reshape(K4.sizes());
267+
}
268+
}
221269

222270
// Diagonal estimation with Hutchinson's method
223271
torch::Tensor

0 commit comments

Comments
 (0)