diff --git a/examples/degeus_mechanics/mech.i b/examples/degeus_mechanics/mech.i index 94bef6fb..8d86e401 100644 --- a/examples/degeus_mechanics/mech.i +++ b/examples/degeus_mechanics/mech.i @@ -7,13 +7,20 @@ ymax = ${fparse 2*pi} zmax = ${fparse 2*pi} mesh_mode = DUMMY + device_names = cpu [] [TensorComputes] [Initialize] [Finit] - type = RankTwoIdentity + type = RankTwoDiagonalTensor buffer = F + value = 1 + [] + [Stressinit] + type = RankTwoDiagonalTensor + buffer = stress + value = 0 [] [phase] @@ -58,12 +65,19 @@ F = F K = K mu = mu - l_tol = 1e-2 - nl_rel_tol = 2e-2 + l_tol = 1e-3 + l_max_its = 200 + nl_rel_tol = 2e-3 nl_abs_tol = 2e-2 constitutive_model = hyper_elasticity stress = stress applied_macroscopic_strain = applied_strain + # hutchinson_steps = 64 + # jacobi_min_rel = 1e-2 + # jacobi_inv_cap = 1e4 + block_jacobi = true + block_jacobi_damp=1e-1 + verbose = true [] [] [] @@ -88,7 +102,7 @@ # deformation tensor is just forwarded Fnew -> F forward_buffer = F forward_buffer_new = Fnew - substeps = 10 + substeps = 1 [] [TensorOutputs] @@ -105,3 +119,8 @@ num_steps = 100 dt = 0.01 [] + +[Outputs] + perf_graph = true + console = true +[] diff --git a/include/tensor_buffers/NEML2TensorBuffer.h b/include/tensor_buffers/NEML2TensorBuffer.h index e50ecae2..2a4ea075 100644 --- a/include/tensor_buffers/NEML2TensorBuffer.h +++ b/include/tensor_buffers/NEML2TensorBuffer.h @@ -21,7 +21,7 @@ class NEML2TensorBuffer : public TensorBuffer NEML2TensorBuffer(const InputParameters & parameters); - virtual void init(); + virtual void init() override; virtual void makeCPUCopy() override; using TensorBuffer::_u; diff --git a/include/tensor_computes/FFTMechanics.h b/include/tensor_computes/FFTMechanics.h index 7c93b4c9..efde660c 100644 --- a/include/tensor_computes/FFTMechanics.h +++ b/include/tensor_computes/FFTMechanics.h @@ -64,6 +64,20 @@ class FFTMechanics : public TensorOperator<> /// applied macroscopic (affine) strain const torch::Tensor * const _applied_macroscopic_strain; + /// steps for diagonal estimation + const unsigned int _hutchinson_steps; + + /// use block-Jacobi (local compliance) preconditioner + const bool _block_jacobi; + /// relative damping for block-Jacobi inversion + const Real _block_jacobi_damp; + + /// minimum relative floor for Jacobi diagonal (relative to median) + const Real _jacobi_min_rel; + /// cap on inverse diagonal scaling (0 disables) + const Real _jacobi_inv_cap; + + /// add diagnostic output for iterations const bool _verbose; using TensorOperatorBase::_dim; diff --git a/include/tensor_computes/RankTwoIdentity.h b/include/tensor_computes/RankTwoDiagonalTensor.h similarity index 81% rename from include/tensor_computes/RankTwoIdentity.h rename to include/tensor_computes/RankTwoDiagonalTensor.h index 6503013c..15e547cb 100644 --- a/include/tensor_computes/RankTwoIdentity.h +++ b/include/tensor_computes/RankTwoDiagonalTensor.h @@ -13,15 +13,18 @@ /** * Identity rank two tensor Tensor */ -class RankTwoIdentity : public TensorOperator<> +class RankTwoDiagonalTensor : public TensorOperator<> { public: static InputParameters validParams(); - RankTwoIdentity(const InputParameters & parameters); + RankTwoDiagonalTensor(const InputParameters & parameters); virtual void computeBuffer() override; /// mesh dimension const unsigned int & _dim; + + /// value on the diagonal + const Real & _value; }; diff --git a/include/utils/SwiftUtils.h b/include/utils/SwiftUtils.h index ef85819e..71f19930 100644 --- a/include/utils/SwiftUtils.h +++ b/include/utils/SwiftUtils.h @@ -51,75 +51,29 @@ torch::Tensor dot24(const torch::Tensor & A2, const torch::Tensor & B4); torch::Tensor dot42(const torch::Tensor & A4, const torch::Tensor & B2); torch::Tensor dyad22(const torch::Tensor & A2, const torch::Tensor & B2); -template -std::tuple -conjugateGradientSolve(T1 A, torch::Tensor b, torch::Tensor x0, double tol, int64_t maxiter, T2 M) -{ - // initialize solution guess - torch::Tensor x = x0.defined() ? x0.clone() : torch::zeros_like(b); - - // norm of b (for relative tolerance) - const double b_norm = torch::norm(b).cpu().template item(); - if (b_norm == 0.0) - // solution is zero if b is zero - return {x, 0u, 0.0}; - - // default max iterations - if (!maxiter) - maxiter = b.numel(); - - // initial residual - torch::Tensor r = b - A(x); - - // Apply preconditioner (or identity) - torch::Tensor z = M(r); // z = M^{-1} r - - // initial search direction p - torch::Tensor p = z.clone(); - - // dot product (r, z) - double rz_old = torch::sum(r * z).cpu().template item(); - - // CG iteration - double res_norm; - for (const auto k : libMesh::make_range(maxiter)) - { - // compute matrix-vector product - const auto Ap = A(p); +// Invert local 4th-order blocks (...., i, j, k, l) -> (...., i, j, k, l) +// Treats each grid point's (i,j)-(k,l) matrix as a (d*d x d*d) block and inverts it in batch. +// Returns a tensor with the same shape as the input, containing per-point block inverses. +torch::Tensor invertLocalBlocks(const torch::Tensor & K4); - // step size alpha - double alpha = rz_old / torch::sum(p * Ap).cpu().template item(); +// Damped inversion of local 4th-order blocks with optional pinv fallback. +// damp_rel scales an identity added to each (d*d x d*d) block by damp_rel * mean(|diag|). +// If inversion fails, falls back to pseudo-inverse. +torch::Tensor invertLocalBlocksDamped(const torch::Tensor & K4, double damp_rel = 1e-8); - // update solution - x = x + alpha * p; +torch::Tensor +estimateJacobiPreconditioner(const std::function & A, + const torch::Tensor & template_vec, + int num_samples = 6); - // update residual - r = r - alpha * Ap; - res_norm = torch::norm(r).cpu().template item(); // ||r|| - - // std::cout << res_norm << '\n'; - - // Converged to desired tolerance - if (res_norm <= tol * b_norm) - return {x, k + 1, res_norm}; - - // apply preconditioner to new residual - z = M(r); - const auto rz_new = torch::sum(r * z).cpu().template item(); - - // update scalar beta - double beta = rz_new / rz_old; - - // update search direction - p = z + beta * p; - - // prepare for next iteration - rz_old = rz_new; - } +std::tuple - // Reached max iterations without full convergence - return {x, maxiter, res_norm}; -} +conjugateGradientSolve(const std::function & A, + torch::Tensor b, + torch::Tensor x0, + double tol, + int64_t maxiter, + const std::function & M); template std::tuple diff --git a/src/tensor_computes/FFTMechanics.C b/src/tensor_computes/FFTMechanics.C index ced49666..84ccae52 100644 --- a/src/tensor_computes/FFTMechanics.C +++ b/src/tensor_computes/FFTMechanics.C @@ -14,6 +14,7 @@ #include #include #include +#include registerMooseObject("SwiftApp", FFTMechanics); @@ -41,6 +42,22 @@ FFTMechanics::validParams() params.addParam("applied_macroscopic_strain", "Applied macroscopic strain"); params.addParam("F", "F", "Deformation gradient tensor."); + params.addParam("hutchinson_steps", + 0, + "Steps for diagonal estimation with Hutchinson's method used in " + "Jacobi preconditioning. 0 skips preconditioning."); + params.addParam("block_jacobi", + false, + "Use block-Jacobi (local compliance) preconditioner instead of diagonal."); + params.addParam("block_jacobi_damp", + 1e-8, + "Relative damping added to local tangent blocks before inversion."); + params.addParam( + "jacobi_min_rel", + 1e-3, + "Minimum relative floor for stochastic Jacobi diagonal (relative to median)."); + params.addParam( + "jacobi_inv_cap", 0.0, "Cap on inverse diagonal scaling; 0 disables clamping."); params.addParam("verbose", false, "Print non-linear residuals."); return params; } @@ -69,6 +86,11 @@ FFTMechanics::FFTMechanics(const InputParameters & parameters) _applied_macroscopic_strain(isParamValid("applied_macroscopic_strain") ? &getInputBuffer("applied_macroscopic_strain") : nullptr), + _hutchinson_steps(getParam("hutchinson_steps")), + _block_jacobi(getParam("block_jacobi")), + _block_jacobi_damp(getParam("block_jacobi_damp")), + _jacobi_min_rel(getParam("jacobi_min_rel")), + _jacobi_inv_cap(getParam("jacobi_inv_cap")), _verbose(getParam("verbose")) { // Build projection tensor once @@ -123,18 +145,67 @@ FFTMechanics::computeBuffer() const auto Fn = at::linalg_norm(_u, c10::nullopt, c10::nullopt, false, c10::nullopt).cpu().item(); - unsigned int iiter = 0; auto dFm = torch::zeros_like(b); // iterate as long as the iterative update does not vanish - while (true) + for (const auto iiter : make_range(_nl_max_its)) { + c10::optional invK4; + const auto diag_estimate = + (!_block_jacobi && _hutchinson_steps) + ? torch::abs(estimateJacobiPreconditioner(G_K_dF, b, _hutchinson_steps)) + : torch::ones_like(b); + auto inv_diag = torch::ones_like(b); + if (!_block_jacobi && _hutchinson_steps) + { + // Robust floor relative to a nonzero scale to avoid huge inverse scaling + auto mask = diag_estimate > 1e-9; // ignore near-zero estimates + auto selected = at::masked_select(diag_estimate, mask); + auto scale_t = selected.numel() > 0 ? selected.mean() : diag_estimate.mean(); + auto floor_t = scale_t * _jacobi_min_rel; + auto diag_precond = torch::clamp(diag_estimate, floor_t, c10::nullopt); + inv_diag = 1.0 / diag_precond; + if (_jacobi_inv_cap > 0.0) + { + inv_diag = torch::clamp(inv_diag, 0.0, _jacobi_inv_cap); + } + } + const auto M_inv = [&](const torch::Tensor & x) + { + if (_block_jacobi) + { + if (!invK4.has_value()) + invK4 = MooseTensor::invertLocalBlocksDamped(_tK4, _block_jacobi_damp); + auto x2 = x.reshape(_r2_shape); + auto z2raw = MooseTensor::trans2(MooseTensor::ddot42(*invK4, MooseTensor::trans2(x2))); + // Enforce zero-mean (remove k=0 mode) without FFT cost + std::vector reduce_dims(z2raw.dim() - 2); + std::iota(reduce_dims.begin(), reduce_dims.end(), 0); + auto mean2 = z2raw.mean(reduce_dims, /*keepdim=*/true); + auto z2 = z2raw - mean2; + return z2.reshape(-1); + } + else + { + auto x2 = x.reshape(_r2_shape); + auto z2raw = x2 * inv_diag.reshape(_r2_shape); + // Enforce zero-mean (remove k=0 mode) without FFT cost + std::vector reduce_dims(z2raw.dim() - 2); + std::iota(reduce_dims.begin(), reduce_dims.end(), 0); + auto mean2 = z2raw.mean(reduce_dims, /*keepdim=*/true); + auto z2 = z2raw - mean2; + return z2.reshape(-1); + } + }; + const auto [dFm_new, iterations, lnorm] = - conjugateGradientSolve(G_K_dF, b, dFm, _l_tol, _l_max_its); + (_block_jacobi || _hutchinson_steps) + ? conjugateGradientSolve(G_K_dF, b, dFm, _l_tol, _l_max_its, M_inv) + : conjugateGradientSolve(G_K_dF, b, dFm, _l_tol, _l_max_its); dFm = dFm_new; // update DOFs (array -> tens.grid) - _u = _u + dFm.reshape(_r2_shape); + _u += dFm.reshape(_r2_shape); // new residual stress and tangent _constitutive_model.computeBuffer(); @@ -148,16 +219,13 @@ FFTMechanics::computeBuffer() // print nonlinear residual to the screen if (_verbose) - _console << "|R|=" << anorm << "\t|R/R0|=" << rnorm << '\n'; + Moose::out << iiter << " |R|=" << anorm << "\t|R/R0|=" << rnorm << std::endl; // check convergence - if ((rnorm < _nl_rel_tol || anorm < _nl_abs_tol) && iiter > 0) - break; - - iiter++; - - if (iiter > _nl_max_its) - paramError("nl_max_its", - "Exceeded the maximum number of nonlinear iterations without converging."); + if (rnorm < _nl_rel_tol || anorm < _nl_abs_tol) + return; } + + paramError("nl_max_its", + "Exceeded the maximum number of nonlinear iterations without converging."); } diff --git a/src/tensor_computes/RandomTensor.C b/src/tensor_computes/RandomTensor.C index 46535bb2..952c3210 100644 --- a/src/tensor_computes/RandomTensor.C +++ b/src/tensor_computes/RandomTensor.C @@ -38,8 +38,9 @@ RandomTensor::computeBuffer() { const auto min = getParam("min"); const auto max = getParam("max"); - if (isParamValid("seed")) - torch::manual_seed(getParam("seed")); + + if (const auto seed = queryParam("seed")) + torch::manual_seed(*seed); if (_generate_on_cpu) { diff --git a/src/tensor_computes/RankTwoIdentity.C b/src/tensor_computes/RankTwoDiagonalTensor.C similarity index 62% rename from src/tensor_computes/RankTwoIdentity.C rename to src/tensor_computes/RankTwoDiagonalTensor.C index fe5f8bed..fbaa7484 100644 --- a/src/tensor_computes/RankTwoIdentity.C +++ b/src/tensor_computes/RankTwoDiagonalTensor.C @@ -6,28 +6,29 @@ /* ALL RIGHTS RESERVED */ /**********************************************************************/ -#include "RankTwoIdentity.h" +#include "RankTwoDiagonalTensor.h" #include "SwiftUtils.h" #include "DomainAction.h" -registerMooseObject("SwiftApp", RankTwoIdentity); +registerMooseObject("SwiftApp", RankTwoDiagonalTensor); InputParameters -RankTwoIdentity::validParams() +RankTwoDiagonalTensor::validParams() { InputParameters params = TensorOperator<>::validParams(); params.addClassDescription("Rank two identity tensor in real space."); + params.addParam("value", 1.0, "Value on the diagonal"); return params; } -RankTwoIdentity::RankTwoIdentity(const InputParameters & parameters) - : TensorOperator(parameters), _dim(_domain.getDim()) +RankTwoDiagonalTensor::RankTwoDiagonalTensor(const InputParameters & parameters) + : TensorOperator(parameters), _dim(_domain.getDim()), _value(getParam("value")) { } void -RankTwoIdentity::computeBuffer() +RankTwoDiagonalTensor::computeBuffer() { const auto full = _domain.getValueShape({_dim, _dim}); - _u = torch::eye(_dim, MooseTensor::floatTensorOptions()).expand(full); + _u = (torch::eye(_dim, MooseTensor::floatTensorOptions()) * _value).expand(full); } diff --git a/src/utils/SwiftUtils.C b/src/utils/SwiftUtils.C index 9d4d9537..cf1196e7 100644 --- a/src/utils/SwiftUtils.C +++ b/src/utils/SwiftUtils.C @@ -10,6 +10,9 @@ #include "SwiftApp.h" #include "MooseUtils.h" #include "Moose.h" +// for batched linear algebra +#include +#include namespace MooseTensor { @@ -218,5 +221,146 @@ printBuffer(const torch::Tensor & t, const unsigned int & precision, const unsig std::cout << std::endl; } } +// Invert local 4th-order blocks (batch of d*d x d*d matrices) +torch::Tensor +invertLocalBlocks(const torch::Tensor & K4) +{ + // Expect shape: [..., d, d, d, d] + const auto d = K4.size(-1); + // Flatten last 4 dims to (d*d, d*d) with batch = prod(leading dims) + auto K2 = K4.reshape({-1, d * d, d * d}); + // Batched inverse + auto K2_inv = at::linalg_inv(K2); + // Restore original shape + return K2_inv.reshape(K4.sizes()); +} + +torch::Tensor +invertLocalBlocksDamped(const torch::Tensor & K4, double damp_rel) +{ + // Flatten to (batch, n, n) + const auto d = K4.size(-1); + const auto n = d * d; + auto K2 = K4.reshape({-1, n, n}); + + // Build batched identity + auto I = torch::eye(n, K4.options()).unsqueeze(0).expand({K2.size(0), n, n}); + + // Scale damping by mean absolute diagonal across batch + auto diag = K2.diagonal(0, -2, -1); + double scale = diag.abs().mean().template item(); + if (!(scale > 0.0)) + scale = 1.0; + const double eps = damp_rel > 0.0 ? damp_rel * scale : 0.0; + + auto K2_reg = eps > 0.0 ? (K2 + eps * I) : K2; + + try + { + auto K2_inv = at::linalg_inv(K2_reg); + return K2_inv.reshape(K4.sizes()); + } + catch (const c10::Error &) + { + auto K2_pinv = at::linalg_pinv(K2_reg); + return K2_pinv.reshape(K4.sizes()); + } +} + +// Diagonal estimation with Hutchinson's method +torch::Tensor +estimateJacobiPreconditioner(const std::function & A, + const torch::Tensor & template_vec, + int num_samples) +{ + torch::Tensor diag_est = torch::zeros_like(template_vec); + + for (int i = 0; i < num_samples; ++i) + { + torch::Tensor v = torch::randint(0, 2, template_vec.sizes(), template_vec.options()) * 2 - 1; + torch::Tensor Av = A(v); + diag_est += v * Av; + } + + diag_est /= num_samples; + + // Avoid division by zero + const torch::Tensor eps = torch::tensor(1e-10, diag_est.options()); + return torch::where(torch::abs(diag_est) < eps, eps, diag_est); +} + +std::tuple +conjugateGradientSolve(const std::function & A, + torch::Tensor b, + torch::Tensor x0, + double tol, + int64_t maxiter, + const std::function & M) +{ + // initialize solution guess + torch::Tensor x = x0.defined() ? x0.clone() : torch::zeros_like(b); + + // norm of b (for relative tolerance) + const double b_norm = torch::norm(b).cpu().template item(); + if (b_norm == 0.0) + // solution is zero if b is zero + return {x, 0u, 0.0}; + + // default max iterations + if (!maxiter) + maxiter = b.numel(); + + // initial residual + torch::Tensor r = b - A(x); + + // Apply preconditioner (or identity) + torch::Tensor z = M(r); // z = M^{-1} r + + // initial search direction p + torch::Tensor p = z.clone(); + + // dot product (r, z) + double rz_old = torch::sum(r * z).cpu().template item(); + + // CG iteration + double res_norm; + for (const auto k : libMesh::make_range(maxiter)) + { + // compute matrix-vector product + const auto Ap = A(p); + + // step size alpha + double alpha = rz_old / torch::sum(p * Ap).cpu().template item(); + + // update solution + x = x + alpha * p; + + // update residual + r = r - alpha * Ap; + res_norm = torch::norm(r).cpu().template item(); // ||r|| + + // std::cout << k << ' '<< res_norm << '\n'; + + // Converged to desired tolerance + if (res_norm <= tol * b_norm) + return {x, k + 1, res_norm}; + + // apply preconditioner to new residual + z = M(r); + const auto rz_new = torch::sum(r * z).cpu().template item(); + + // update scalar beta + double beta = rz_new / rz_old; + + // update search direction + p = z + beta * p; + + // prepare for next iteration + rz_old = rz_new; + } + + // Reached max iterations without full convergence + return {x, maxiter, res_norm}; +} } // namespace MooseTensor diff --git a/test/tests/mechanics/gold/mech3d.h5 b/test/tests/mechanics/gold/mech3d.h5 index fcd62a51..07dd5134 100644 Binary files a/test/tests/mechanics/gold/mech3d.h5 and b/test/tests/mechanics/gold/mech3d.h5 differ diff --git a/test/tests/mechanics/mech.i b/test/tests/mechanics/mech.i index 9a37d7c8..eb95d72d 100644 --- a/test/tests/mechanics/mech.i +++ b/test/tests/mechanics/mech.i @@ -33,7 +33,7 @@ constant_expressions = '0.5 5' [] [Finit] - type = RankTwoIdentity + type = RankTwoDiagonalTensor buffer = F [] [] diff --git a/test/tests/mechanics/mech3d.i b/test/tests/mechanics/mech3d.i index 30e24a44..bc2c6d33 100644 --- a/test/tests/mechanics/mech3d.i +++ b/test/tests/mechanics/mech3d.i @@ -34,7 +34,7 @@ constant_expressions = '0.5 5' [] [Finit] - type = RankTwoIdentity + type = RankTwoDiagonalTensor buffer = F [] []