From 0a83ab329a14d6d36ca3d8b6ea25d4c528c44730 Mon Sep 17 00:00:00 2001
From: Dmitri Naumov <dmitri.naumov@ufz.de>
Date: Thu, 8 Aug 2024 17:44:41 +0200
Subject: [PATCH] [PL] Storing indices locally is measurably faster

A little bit :) 0.5% maybe. Measured on TRM/Mockup ctest.
---
 .../Assembly/ParallelVectorMatrixAssembler.cpp   | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp b/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp
index 3944d02f627..e46377bb59d 100644
--- a/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp
+++ b/ProcessLib/Assembly/ParallelVectorMatrixAssembler.cpp
@@ -33,11 +33,11 @@ void assembleWithJacobianOneElement(
     const NumLib::LocalToGlobalIndexMap& dof_table, const double t,
     const double dt, const GlobalVector& x, const GlobalVector& x_prev,
     std::vector<double>& local_b_data, std::vector<double>& local_Jac_data,
-    std::vector<GlobalIndexType>& indices,
     ProcessLib::AbstractJacobianAssembler& jacobian_assembler,
     ProcessLib::Assembly::MultiMatrixElementCache& cache)
 {
-    indices = NumLib::getIndices(mesh_item_id, dof_table);
+    std::vector<GlobalIndexType> const& indices =
+        NumLib::getIndices(mesh_item_id, dof_table);
 
     local_b_data.clear();
     local_Jac_data.clear();
@@ -95,7 +95,6 @@ void runAssemblyForEachLocalAssembler(
     NumLib::LocalToGlobalIndexMap const& dof_table, double const t,
     double const dt, GlobalVector const& x, GlobalVector const& x_prev,
     std::vector<double>& local_b_data, std::vector<double>& local_Jac_data,
-    std::vector<GlobalIndexType>& indices,
     ProcessLib::AbstractJacobianAssembler& jac_asm, ThreadException& exception,
     ProcessLib::Assembly::MultiMatrixElementCache& cache,
     auto local_matrix_output)
@@ -116,9 +115,9 @@ void runAssemblyForEachLocalAssembler(
 
         try
         {
-            assembleWithJacobianOneElement(
-                element_id, loc_asm, dof_table, t, dt, x, x_prev, local_b_data,
-                local_Jac_data, indices, jac_asm, cache);
+            assembleWithJacobianOneElement(element_id, loc_asm, dof_table, t,
+                                           dt, x, x_prev, local_b_data,
+                                           local_Jac_data, jac_asm, cache);
         }
         catch (...)
         {
@@ -225,7 +224,6 @@ void ParallelVectorMatrixAssembler::assembleWithJacobian(
         // reallocations.
         std::vector<double> local_b_data;
         std::vector<double> local_Jac_data;
-        std::vector<GlobalIndexType> indices;
 
         // copy to avoid concurrent access
         auto const jac_asm = jacobian_assembler_.copy();
@@ -243,8 +241,8 @@ void ParallelVectorMatrixAssembler::assembleWithJacobian(
         // TODO corner case: what if all elements on a submesh are deactivated?
         runAssemblyForEachLocalAssembler(
             collectActiveLocalAssemblers(local_assemblers, active_elements),
-            dof_table, t, dt, x, x_prev, local_b_data, local_Jac_data, indices,
-            *jac_asm, exception, cache, local_matrix_output);
+            dof_table, t, dt, x, x_prev, local_b_data, local_Jac_data, *jac_asm,
+            exception, cache, local_matrix_output);
     }
 
     stats->print();
-- 
GitLab