Speedup CombineDofs (#63)

pjaap · chmerdon · web-flow · commit af2c83af9bd0 · 2025-06-04T18:32:28.000+02:00
Co-authored-by: Christian Merdon &lt;merdon@wias-berlin.de&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,19 +1,20 @@
 # CHANGES
 
-
-## v1.2.0 May 28, 2025
+## v1.2.0 June 4, 2025
 
 ### Changed
   - TimerOutputs for measuring/storing/showing runtime and allocations in solve, now also for separate operators
-  
+  - Coupling matrix result of `compute_periodic_coupling_matrix` is no longer transposed
+  - rewrote internals of `CombineDofs` `apply_penalty` method to speed up the assembly
+
 ### Fixed
   - HomogeneousData/InterpolateBoundaryData operator fix when system matrix is of type GenericMTExtendableSparseMatrixCSC
-  
+
 ## v1.1.1 April 29, 2025
 
 ### Fixed
   - FixDofs operator does not crash when system matrix is of type GenericMTExtendableSparseMatrixCSC
-  
+
 ## v1.1.0 April 17, 2025
 
 ### Changed
diff --git a/src/ExtendableFEM.jl b/src/ExtendableFEM.jl
@@ -57,7 +57,7 @@ using ExtendableGrids: ExtendableGrids, AT_NODES, AbstractElementGeometry,
     unique, update_trafo!, xrefFACE2xrefCELL,
     xrefFACE2xrefOFACE
 using ExtendableSparse: ExtendableSparse, ExtendableSparseMatrix, flush!,
-    MTExtendableSparseMatrixCSC,
+    MTExtendableSparseMatrixCSC, findindex,
     rawupdateindex!
 using ForwardDiff: ForwardDiff
 using GridVisualize: GridVisualize, GridVisualizer, gridplot!, reveal, save,
@@ -75,7 +75,6 @@ using SciMLBase: SciMLBase
 using TimerOutputs: TimerOutput, print_timer, @timeit
 using UnicodePlots: UnicodePlots
 
-
 ## reexport stuff from ExtendableFEMBase and ExtendableGrids
 export FESpace, FEMatrix, FEVector
 export H1P1, H1P2, H1P3, H1Pk
diff --git a/src/common_operators/combinedofs.jl b/src/common_operators/combinedofs.jl
@@ -2,16 +2,19 @@
 ### COMBINE DOFS (e.g. for periodicity) ###
 ###########################################
 
-mutable struct CombineDofs{UT, CT} <: AbstractOperator
+mutable struct CombineDofs{UT, CT, AT} <: AbstractOperator
     uX::UT                  # component nr for dofsX
     uY::UT                  # component nr for dofsY
     coupling_info::CT
+    fixed_dofs::AT
     FESX::Any
     FESY::Any
     assembler::Any
     parameters::Dict{Symbol, Any}
 end
 
+fixed_dofs(O::CombineDofs) = O.fixed_dofs
+
 default_combop_kwargs() = Dict{Symbol, Tuple{Any, String}}(
     :name => ("CombineDofs", "name for operator used in printouts"),
     :penalty => (1.0e30, "penalty for fixed degrees of freedom"),
@@ -61,7 +64,14 @@ $(_myprint(default_combop_kwargs()))
 function CombineDofs(uX, uY, coupling_matrix::AbstractMatrix; kwargs...)
     parameters = Dict{Symbol, Any}(k => v[1] for (k, v) in default_combop_kwargs())
     _update_params!(parameters, kwargs)
-    return CombineDofs(uX, uY, coupling_matrix, nothing, nothing, nothing, parameters)
+    fixed_dofs = zeros(Int, 0)
+    for dof_i in 1:size(coupling_matrix, 2)
+        coupling_i = @views coupling_matrix[:, dof_i]
+        if nnz(coupling_i) > 0
+            push!(fixed_dofs, dof_i)
+        end
+    end
+    return CombineDofs(uX, uY, coupling_matrix, fixed_dofs, nothing, nothing, nothing, parameters)
 end
 
 function apply_penalties!(A, b, sol, CD::CombineDofs{UT, CT}, SC::SolverConfiguration; assemble_matrix = true, assemble_rhs = true, kwargs...) where {UT, CT}
@@ -80,118 +90,81 @@ function build_assembler!(CD::CombineDofs{UT, CT}, FE::Array{<:FEVectorBlock, 1}
     FESX, FESY = FE[1].FES, FE[2].FES
     if (CD.FESX != FESX) || (CD.FESY != FESY)
         coupling_matrix = CD.coupling_info
+        fixed_dofs = CD.fixed_dofs
         offsetX = FE[1].offset
         offsetY = FE[2].offset
         if CD.parameters[:verbosity] > 0
             @info ".... coupling $(length(coupling_matrix.nzval)) dofs"
         end
-        function assemble!(A::AbstractSparseArray{T}, b::AbstractVector{T}, assemble_matrix::Bool, assemble_rhs::Bool, kwargs...) where {T}
-
-            # transpose the matrix once for efficient row access
-            transposed_coupling_matrix = sparse(transpose(coupling_matrix))
+        penalty = CD.parameters[:penalty]
 
+        function assemble!(A::AbstractSparseArray{T}, b::AbstractVector{T}, assemble_matrix::Bool, assemble_rhs::Bool, kwargs...) where {T}
             if assemble_matrix
-                # go through each coupled dof and update the FE adjacency info
-                # from the constrained dofs here
-
-                for dof_i in 1:size(coupling_matrix, 2)
+                # go through each constrained dof and update the FE adjacency info
+                # of the coupled dofs
+                for dof_i in fixed_dofs
                     # this col-view is efficient
                     coupling_i = @views coupling_matrix[:, dof_i]
-                    # do nothing if dof_k is not coupled to any constrained dof
-                    if nnz(coupling_i) == 0
-                        continue
-                    end
 
                     # write the FE adjacency of the constrained dofs into this row
-                    targetrow = dof_i + offsetX
+                    sourcerow = dof_i + offsetX
 
                     # extract the constrained dofs and the weights
                     coupled_dofs_i, weights_i = findnz(coupling_i)
 
-                    # parse through all cols and update the entries
-                    for dof_j in 1:size(coupling_matrix, 2)
-                        # this col-view is efficient
-                        coupling_j = @views coupling_matrix[:, dof_j]
-
-                        # if both dof_i and dof_j are coupled to a constrained dof, then
-                        # the FE adjacency A_ij is not updated: this is covered by the linear combinations
-                        # expressed in the rows of the constrained dofs_on_boundary
-                        # Hence, check that dof_j is not coupled to anything
-                        if nnz(coupling_j) == 0
-                            targetcol = dof_j + offsetY
-                            for (dof_k, weight_ik) in zip(coupled_dofs_i, weights_i)
-                                sourcerow = dof_k + offsetX
-                                sourcecol = targetcol
-                                val = A[sourcerow, sourcecol]
-                                _addnz(A, targetrow, targetcol, val, weight_ik)
+                    # parse through sourcerow and add the contents to the coupled dofs
+                    for col in 1:size(A, 2)
+                        r = findindex(A.cscmatrix, sourcerow, col)
+                        if r > 0
+                            val = A.cscmatrix.nzval[r]
+                            if abs(val) > 1.0e-15
+                                for (dof_k, weight_ik) in zip(coupled_dofs_i, weights_i)
+                                    targetrow = dof_k + offsetX
+                                    _addnz(A, targetrow, col, val, weight_ik)
+                                end
                             end
                         end
                     end
                 end
 
                 # replace the geometric coupling rows based
                 # on the original coupling matrix
-                for dof_i in 1:size(transposed_coupling_matrix, 2)
-
-                    coupling_i = transposed_coupling_matrix[:, dof_i]
-                    # do nothing if no coupling for dof_i
-                    if nnz(coupling_i) == 0
-                        continue
-                    end
+                for dof_i in fixed_dofs
+                    coupling_i = coupling_matrix[:, dof_i]
 
                     # get the coupled dofs of dof_i and the corresponding weights
                     coupled_dofs_i, weights_i = findnz(coupling_i)
-
                     sourcerow = dof_i + offsetX
 
-                    # eliminate the sourcerow
-                    for col in 1:size(A, 2)
-                        A[sourcerow, col] = 0
-                    end
-
                     # replace sourcerow with coupling linear combination
-                    _addnz(A, sourcerow, sourcerow, -1.0, 1)
+                    _addnz(A, sourcerow, sourcerow, -1.0, penalty)
                     for (dof_j, weight_ij) in zip(coupled_dofs_i, weights_i)
                         # weights for ∑ⱼ wⱼdofⱼ - dofᵢ = 0
-                        _addnz(A, sourcerow, dof_j + offsetY, weight_ij, 1)
+                        _addnz(A, sourcerow, dof_j + offsetY, weight_ij, penalty)
                     end
-
                 end
                 flush!(A)
             end
 
             if assemble_rhs
-
-                for dof_i in 1:size(coupling_matrix, 2)
+                for dof_i in fixed_dofs
                     # this col-view is efficient
                     coupling_i = @views coupling_matrix[:, dof_i]
-                    # do nothing if no coupling for dof_i
-                    if nnz(coupling_i) == 0
-                        continue
-                    end
 
                     # get the coupled dofs of dof_i and the corresponding weights
                     coupled_dofs, weights = findnz(coupling_i)
 
                     # transfer all assembly information to dof_i
-                    targetrow = dof_i + offsetY
+                    sourcerow = dof_i + offsetY
                     for (dof_j, weight) in zip(coupled_dofs, weights)
-                        sourcerow = dof_j + offsetY
+                        targetrow = dof_j + offsetY
                         b[targetrow] += weight * b[sourcerow]
                     end
                 end
 
-
                 # now set the rows of the constrained dofs to zero to enforce the linear combination
-                for dof_i in 1:size(transposed_coupling_matrix, 2)
-                    coupling_i = transposed_coupling_matrix[:, dof_i]
-                    # do nothing if no coupling for dof_i
-                    if nnz(coupling_i) == 0
-                        continue
-                    end
-
+                for dof_i in fixed_dofs
                     b[dof_i + offsetX] = 0.0
-
                 end
             end
 
diff --git a/src/helper_functions.jl b/src/helper_functions.jl
@@ -396,7 +396,7 @@ function _get_periodic_coupling_matrix(
                 # set entries
                 for (i, target_entry) in enumerate(fe_vector_target.entries)
                     if abs(target_entry) > sparsity_tol
-                        result[i, local_dof] = target_entry
+                        result[local_dof, i] = target_entry
                     end
                 end
             end
@@ -410,7 +410,7 @@ function _get_periodic_coupling_matrix(
         @warn "no coupling found. Are the grid boundary regions and the give_opposite! method correct?"
     end
 
-    return sparse(result)
+    return sp_result
 end
 
 """
@@ -440,10 +440,8 @@ Example: If b_from is at x[1] = 0 and the opposite boundary is at y[1] = 1, then
 The return value is a (𝑛 × 𝑛) sparse matrix 𝐴 (𝑛 is the total number of dofs) containing the periodic coupling information.
 The relation ship between the degrees of freedome is  dofᵢ = ∑ⱼ Aⱼᵢ ⋅ dofⱼ.
 It is guaranteed that
-    i)  Aⱼᵢ=0 if dofᵢ is 𝑛𝑜𝑡 on the boundary b_from.
-    ii) Aⱼᵢ=0 if the opposite of dofᵢ is not in the same grid cell as dofⱼ.
-Note that A is transposed for efficient col-wise storage.
-
+    i)  Aᵢⱼ=0 if dofᵢ is 𝑛𝑜𝑡 on the boundary b_from.
+    ii) Aᵢⱼ=0 if the opposite of dofᵢ is not in the same grid cell as dofⱼ.
 """
 function get_periodic_coupling_matrix(
         FES,
diff --git a/test/test_helper_functions.jl b/test/test_helper_functions.jl
@@ -54,11 +54,11 @@ function run_test_helper_functions()
             end
         end
 
-        # row sum is 0.0 or 1.0
+        # col sum is 0.0 or 1.0
         for i in 1:size(matrix, 1)
-            row_sum = sum(matrix[i, :])
-            if !(row_sum == 0.0 || row_sum ≈ 1.0)
-                @show row_sum i
+            col_cum = sum(matrix[:, i])
+            if !(col_cum == 0.0 || col_cum ≈ 1.0)
+                @show col_cum i
                 return false
             end
         end
@@ -78,14 +78,14 @@ function run_test_helper_functions()
         let # 3D P1
             xgrid = simplexgrid(0:0.1:1.0, 0:0.1:1.0, 0:0.1:1.0)
             FES = FESpace{H1P1{1}}(xgrid)
-            A = get_periodic_coupling_matrix(FES, xgrid, 4, 2, give_opposite!, sparsity_tol = 1.0e-8)
+            A = get_periodic_coupling_matrix(FES, 4, 2, give_opposite!, sparsity_tol = 1.0e-8)
             @test test_matrix(A)
         end
 
         let # 3D P2 with 2 components
             xgrid = simplexgrid(0:0.5:1.0, 0:0.5:1.0, 0:0.5:1.0)
             FES = FESpace{H1P2{2, 3}}(xgrid)
-            A = get_periodic_coupling_matrix(FES, xgrid, 4, 2, give_opposite!, sparsity_tol = 1.0e-8)
+            A = get_periodic_coupling_matrix(FES, 4, 2, give_opposite!, sparsity_tol = 1.0e-8)
             @test test_matrix(A)
         end
 
@@ -98,7 +98,7 @@ function run_test_helper_functions()
             xgrid = simplexgrid(b)
 
             FES = FESpace{H1P1{1}}(xgrid)
-            A = get_periodic_coupling_matrix(FES, xgrid, 4, 2, give_opposite!, sparsity_tol = 1.0e-8)
+            A = get_periodic_coupling_matrix(FES, 4, 2, give_opposite!, sparsity_tol = 1.0e-8)
             @test test_matrix(A; structured_grid = false)
         end
     end