diff --git a/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.h b/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.h
index 840872938749f9c27f202fa8b94c4f8958eee1f5..0072d92359a1f3791f63b6ba4d01bdc50280190d 100644
--- a/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.h
+++ b/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.h
@@ -61,8 +61,14 @@ public:
     }
 
     void postTimestepConcreteProcess(GlobalVector const& x,
-                                     int const /*process_id*/) override
+                                     int const process_id) override
     {
+        //For this single process, process_id is always zero.
+        if (process_id != 0)
+        {
+            OGS_FATAL("The condition of process_id = 0 must be satisfied for "
+                      "GroundwaterFlowProcess, which is a single process." );
+        }
         if (_balance_mesh) // computing the balance is optional
         {
             std::vector<double> init_values(
@@ -70,8 +76,6 @@ public:
             MeshLib::addPropertyToMesh(*_balance_mesh, _balance_pv_name,
                                        MeshLib::MeshItemType::Cell, 1,
                                        init_values);
-            //For this single process, process_id is always zero.
-            const int process_id = 0;
             auto balance = ProcessLib::CalculateSurfaceFlux(
                 *_balance_mesh,
                 getProcessVariables(process_id)[0]
diff --git a/ProcessLib/HT/HTProcess.cpp b/ProcessLib/HT/HTProcess.cpp
index 96db34147059bd7b994d4d4b372a3fb2a01af6d2..a4846b8052152f161fe876f4a00e9e113ac25e5f 100644
--- a/ProcessLib/HT/HTProcess.cpp
+++ b/ProcessLib/HT/HTProcess.cpp
@@ -90,7 +90,7 @@ void HTProcess::assembleConcreteProcess(const double t,
     if (_use_monolithic_scheme)
     {
         DBUG("Assemble HTProcess.");
-        dof_tables.emplace_back(std::ref(*_local_to_global_index_map));
+        dof_tables.emplace_back(*_local_to_global_index_map);
     }
     else
     {
@@ -107,8 +107,8 @@ void HTProcess::assembleConcreteProcess(const double t,
                 "fluid flow process within HTProcess.");
         }
         setCoupledSolutionsOfPreviousTimeStep();
-        dof_tables.emplace_back(std::ref(*_local_to_global_index_map));
-        dof_tables.emplace_back(std::ref(*_local_to_global_index_map));
+        dof_tables.emplace_back(*_local_to_global_index_map);
+        dof_tables.emplace_back(*_local_to_global_index_map);
     }
 
     // Call global assembler for each local assembly item.
@@ -219,7 +219,7 @@ void HTProcess::setCoupledSolutionsOfPreviousTimeStep()
             OGS_FATAL(
                 "Memory is not allocated for the global vector "
                 "of the solution of the previous time step for the ."
-                "staggered scheme.\n It can be done by overloading "
+                "staggered scheme.\n It can be done by overriding "
                 "Process::preTimestepConcreteProcess"
                 "(ref. HTProcess::preTimestepConcreteProcess) ");
         }
diff --git a/ProcessLib/HydroMechanics/HydroMechanicsProcess-impl.h b/ProcessLib/HydroMechanics/HydroMechanicsProcess-impl.h
index c77505de58db96360c712ff6ae14ebe16d8bc483..14aec3eaf0605531b4420ecd885366084a7d051b 100644
--- a/ProcessLib/HydroMechanics/HydroMechanicsProcess-impl.h
+++ b/ProcessLib/HydroMechanics/HydroMechanicsProcess-impl.h
@@ -235,22 +235,22 @@ void HydroMechanicsProcess<DisplacementDim>::initializeBoundaryConditions()
 {
     if (_use_monolithic_scheme)
     {
-        const int process_id_of_up = 0;
-        initializeProcessBoundaryCondition(*_local_to_global_index_map,
-                                           process_id_of_up);
+        const int process_id_of_hydromechancs = 0;
+        initializeProcessBoundaryConditionsAndSourceTerms(
+            *_local_to_global_index_map, process_id_of_hydromechancs);
         return;
     }
 
     // Staggered scheme:
     // for the equations of pressure
-    const int process_id_of_p = 0;
-    initializeProcessBoundaryCondition(
-        *_local_to_global_index_map_with_base_nodes, process_id_of_p);
+    const int hydraulic_process_id = 0;
+    initializeProcessBoundaryConditionsAndSourceTerms(
+        *_local_to_global_index_map_with_base_nodes, hydraulic_process_id);
 
     // for the equations of deformation.
-    const int process_id_of_u = 1;
-    initializeProcessBoundaryCondition(*_local_to_global_index_map,
-                                       process_id_of_u);
+    const int mechanical_process_id = 1;
+    initializeProcessBoundaryConditionsAndSourceTerms(
+        *_local_to_global_index_map, mechanical_process_id);
 }
 
 template <int DisplacementDim>
@@ -289,7 +289,7 @@ void HydroMechanicsProcess<DisplacementDim>::
         DBUG(
             "Assemble the Jacobian of HydroMechanics for the monolithic"
             " scheme.");
-        dof_tables.push_back(std::ref(*_local_to_global_index_map));
+        dof_tables.emplace_back(*_local_to_global_index_map);
     }
     else
     {
@@ -306,9 +306,8 @@ void HydroMechanicsProcess<DisplacementDim>::
                 "Assemble the Jacobian equations of mechanical process in "
                 "HydroMechanics for the staggered scheme.");
         }
-        std::vector<std::reference_wrapper<NumLib::LocalToGlobalIndexMap>>
-            dof_tables = {std::ref(*_local_to_global_index_map_with_base_nodes),
-                          std::ref(*_local_to_global_index_map)};
+        dof_tables.emplace_back(*_local_to_global_index_map_with_base_nodes);
+        dof_tables.emplace_back(*_local_to_global_index_map);
     }
 
     GlobalExecutor::executeMemberDereferenced(
@@ -327,9 +326,7 @@ void HydroMechanicsProcess<DisplacementDim>::preTimestepConcreteProcess(
     _process_data.dt = dt;
     _process_data.t = t;
 
-    // If monolithic scheme is used or the equation of deformation is solved in
-    // the staggered scheme.
-    if (_use_monolithic_scheme || process_id == 1)
+    if (hasMechanicalProcess(process_id))
         GlobalExecutor::executeMemberOnDereferenced(
             &LocalAssemblerInterface::preTimestep, _local_assemblers,
             *_local_to_global_index_map, x, t, dt);
@@ -346,10 +343,10 @@ void HydroMechanicsProcess<DisplacementDim>::postTimestepConcreteProcess(
 }
 
 template <int DisplacementDim>
-void HydroMechanicsProcess<DisplacementDim>::postNonLinearSolverProcess(
+void HydroMechanicsProcess<DisplacementDim>::postNonLinearSolverConcreteProcess(
     GlobalVector const& x, const double t, const int process_id)
 {
-    if (!_use_monolithic_scheme && process_id == 0)
+    if (!hasMechanicalProcess(process_id))
     {
         return;
     }
@@ -374,9 +371,7 @@ template <int DisplacementDim>
 NumLib::LocalToGlobalIndexMap const&
 HydroMechanicsProcess<DisplacementDim>::getDOFTable(const int process_id) const
 {
-    // If monolithic scheme is used or the equation of deformation is solved in
-    // the staggered scheme.
-    if (_use_monolithic_scheme || process_id == 1)
+    if (hasMechanicalProcess(process_id))
     {
         return *_local_to_global_index_map;
     }
diff --git a/ProcessLib/HydroMechanics/HydroMechanicsProcess.h b/ProcessLib/HydroMechanics/HydroMechanicsProcess.h
index 97e2593c38e860ad78919eddfca656210d43d1ff..f6ce3e4ced8a2e4e53622f5ee84995c5219b16c6 100644
--- a/ProcessLib/HydroMechanics/HydroMechanicsProcess.h
+++ b/ProcessLib/HydroMechanics/HydroMechanicsProcess.h
@@ -89,7 +89,7 @@ private:
     void postTimestepConcreteProcess(GlobalVector const& x,
                                      int const process_id) override;
 
-    void postNonLinearSolverProcess(GlobalVector const& x, const double t,
+    void postNonLinearSolverConcreteProcess(GlobalVector const& x, const double t,
                                      int const process_id) override;
 
     NumLib::LocalToGlobalIndexMap const& getDOFTable(
@@ -123,6 +123,13 @@ private:
     std::tuple<NumLib::LocalToGlobalIndexMap*, bool>
         getDOFTableForExtrapolatorData() const override;
 
+    /// Check whether the process represented by \c process_id is/has
+    /// mechanical process. In the present implementation, the mechanical
+    /// process has process_id == 1 in the staggered scheme.
+    bool hasMechanicalProcess(int const process_id) const
+    {
+        return _use_monolithic_scheme || process_id == 1;
+    }
 };
 
 extern template class HydroMechanicsProcess<2>;
diff --git a/ProcessLib/LocalAssemblerInterface.h b/ProcessLib/LocalAssemblerInterface.h
index eda17c0f65a52233e44875213c2af783db54393d..f750b6aa2a71b281b63bc99dabfb8f222f8a6ab8 100644
--- a/ProcessLib/LocalAssemblerInterface.h
+++ b/ProcessLib/LocalAssemblerInterface.h
@@ -78,10 +78,10 @@ public:
                               NumLib::LocalToGlobalIndexMap const& dof_table,
                               GlobalVector const& x);
 
-    virtual void postNonLinearSolver(std::size_t const mesh_item_id,
-                              NumLib::LocalToGlobalIndexMap const& dof_table,
-                              GlobalVector const& x, double const t,
-                              bool const use_monolithic_scheme);
+    void postNonLinearSolver(std::size_t const mesh_item_id,
+                             NumLib::LocalToGlobalIndexMap const& dof_table,
+                             GlobalVector const& x, double const t,
+                             bool const use_monolithic_scheme);
 
     /// Computes the flux in the point \c p_local_coords that is given in local
     /// coordinates using the values from \c local_x.
diff --git a/ProcessLib/PhaseField/PhaseFieldProcess-impl.h b/ProcessLib/PhaseField/PhaseFieldProcess-impl.h
index a12755269c4ee1fdf2266fd412ab2957218cb407..eedd3abc08af808b1f93005b477cc099cb09354a 100644
--- a/ProcessLib/PhaseField/PhaseFieldProcess-impl.h
+++ b/ProcessLib/PhaseField/PhaseFieldProcess-impl.h
@@ -143,11 +143,11 @@ void PhaseFieldProcess<DisplacementDim>::assembleConcreteProcess(
     DBUG("Assemble PhaseFieldProcess.");
 
     std::vector<std::reference_wrapper<NumLib::LocalToGlobalIndexMap>>
-       dof_table = {std::ref(*_local_to_global_index_map)};
+       dof_tables = {std::ref(*_local_to_global_index_map)};
     // Call global assembler for each local assembly item.
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assemble, _local_assemblers,
-        dof_table, t, x, M, K, b, _coupled_solutions);
+        dof_tables, t, x, M, K, b, _coupled_solutions);
 }
 
 template <int DisplacementDim>
@@ -159,11 +159,11 @@ void PhaseFieldProcess<DisplacementDim>::assembleWithJacobianConcreteProcess(
     // DBUG("AssembleJacobian PhaseFieldProcess.");
 
     std::vector<std::reference_wrapper<NumLib::LocalToGlobalIndexMap>>
-       dof_table = {std::ref(*_local_to_global_index_map)};
+       dof_tables = {std::ref(*_local_to_global_index_map)};
     // Call global assembler for each local assembly item.
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assembleWithJacobian,
-        _local_assemblers, dof_table, t, x, xdot, dxdot_dx,
+        _local_assemblers, dof_tables, t, x, xdot, dxdot_dx,
         dx_dx, M, K, b, Jac, _coupled_solutions);
 }
 
diff --git a/ProcessLib/Process.cpp b/ProcessLib/Process.cpp
index be40b175407b338dce319520f89598f0f0c307c2..8e2e0875f2812320d55c33d8a83ae8130be8435c 100644
--- a/ProcessLib/Process.cpp
+++ b/ProcessLib/Process.cpp
@@ -51,7 +51,7 @@ Process::Process(
 {
 }
 
-void Process::initializeProcessBoundaryCondition(
+void Process::initializeProcessBoundaryConditionsAndSourceTerms(
     const NumLib::LocalToGlobalIndexMap& dof_table, const int process_id)
 {
     auto const& per_process_variables = _process_variables[process_id];
@@ -79,7 +79,8 @@ void Process::initializeBoundaryConditions()
     const std::size_t number_of_processes = _process_variables.size();
     for (std::size_t pcs_id = 0; pcs_id < number_of_processes; pcs_id++)
     {
-        initializeProcessBoundaryCondition(*_local_to_global_index_map, pcs_id);
+        initializeProcessBoundaryConditionsAndSourceTerms(
+            *_local_to_global_index_map, pcs_id);
     }
 }
 
@@ -109,7 +110,7 @@ void Process::setInitialConditions(const int process_id, double const t,
                                    GlobalVector& x)
 {
     // getDOFTableOfProcess can be overloaded by the specific process.
-    auto const dof_table_of_process = getDOFTable(process_id);
+    auto const& dof_table_of_process = getDOFTable(process_id);
 
     auto const& per_process_variables = _process_variables[process_id];
     for (std::size_t variable_id = 0;
@@ -286,10 +287,10 @@ Process::getDOFTableForExtrapolatorData() const
     const bool manage_storage = true;
 
     return std::make_tuple(new NumLib::LocalToGlobalIndexMap(
-                std::move(all_mesh_subsets_single_component),
-                // by location order is needed for output
-                NumLib::ComponentOrder::BY_LOCATION),
-            manage_storage);
+                               std::move(all_mesh_subsets_single_component),
+                               // by location order is needed for output
+                               NumLib::ComponentOrder::BY_LOCATION),
+                           manage_storage);
 }
 
 void Process::initializeExtrapolator()
@@ -357,7 +358,7 @@ void Process::postNonLinearSolver(GlobalVector const& x, const double t,
                                   int const process_id)
 {
     MathLib::LinAlg::setLocalAccessibleVector(x);
-    postNonLinearSolverProcess(x, t, process_id);
+    postNonLinearSolverConcreteProcess(x, t, process_id);
 }
 
 void Process::computeSecondaryVariable(const double t, GlobalVector const& x)
diff --git a/ProcessLib/Process.h b/ProcessLib/Process.h
index 92e7e823cf74e4ccd57f6f845f6fa9c34b90ac29..1ffb253dc710e58595214f79f77b4c1e9ca04904 100644
--- a/ProcessLib/Process.h
+++ b/ProcessLib/Process.h
@@ -150,7 +150,7 @@ protected:
      * processes modelled by the monolithic scheme. It is called by
      * initializeBoundaryConditions().
      */
-    void initializeProcessBoundaryCondition(
+    void initializeProcessBoundaryConditionsAndSourceTerms(
         const NumLib::LocalToGlobalIndexMap& dof_table, const int process_id);
 
 private:
@@ -190,9 +190,9 @@ private:
     {
     }
 
-    virtual void postNonLinearSolverProcess(GlobalVector const& /*x*/,
-                                            const double /*t*/,
-                                            int const /*process_id*/)
+    virtual void postNonLinearSolverConcreteProcess(GlobalVector const& /*x*/,
+                                                    const double /*t*/,
+                                                    int const /*process_id*/)
     {
     }
 
diff --git a/ProcessLib/UncoupledProcessesTimeLoop.cpp b/ProcessLib/UncoupledProcessesTimeLoop.cpp
index 479fc1d48d9451d21e84e19684858974fc94167d..334ef56f3e6a17393671efbb6f1b1b09630803d6 100644
--- a/ProcessLib/UncoupledProcessesTimeLoop.cpp
+++ b/ProcessLib/UncoupledProcessesTimeLoop.cpp
@@ -360,6 +360,18 @@ std::unique_ptr<UncoupledProcessesTimeLoop> createUncoupledProcessesTimeLoop(
         //! \ogs_file_param{prj__time_loop__processes}
         config.getConfigSubtree("processes"), processes, nonlinear_solvers);
 
+    if (coupling_config)
+    {
+        if (global_coupling_conv_criteria.size() != per_process_data.size())
+        {
+            OGS_FATAL(
+                "The number of convergence criteria of the global staggered "
+                "coupling loop is not identical to the number of the "
+                "processes! Please check the element by tag "
+                "global_process_coupling in the project file.");
+        }
+    }
+
     const auto minmax_iter = std::minmax_element(
         per_process_data.begin(),
         per_process_data.end(),
@@ -956,11 +968,6 @@ bool UncoupledProcessesTimeLoop::solveCoupledEquationSystemsByStaggeredScheme(
             }
             MathLib::LinAlg::copy(x, x_old);
 
-            if (coupling_iteration_converged && global_coupling_iteration > 0)
-            {
-                break;
-            }
-
             ++process_id;
         }  // end of for (auto& spd : _per_process_data)
 
diff --git a/ProcessLib/VectorMatrixAssembler.cpp b/ProcessLib/VectorMatrixAssembler.cpp
index e81523fec811c83215660902eb09f67477d74311..e5b77645e5207b8da8ec167e0e0153da22e7b481 100644
--- a/ProcessLib/VectorMatrixAssembler.cpp
+++ b/ProcessLib/VectorMatrixAssembler.cpp
@@ -53,7 +53,7 @@ void VectorMatrixAssembler::assemble(
             NumLib::getIndices(mesh_item_id, dof_tables[i].get()));
     }
 
-    auto const& indices = (dof_tables.size() == 1 && cpl_xs == nullptr)
+    auto const& indices = (cpl_xs == nullptr)
                               ? indices_of_processes[0]
                               : indices_of_processes[cpl_xs->process_id];
     _local_M_data.clear();
@@ -120,7 +120,7 @@ void VectorMatrixAssembler::assembleWithJacobian(
             NumLib::getIndices(mesh_item_id, dof_tables[i].get()));
     }
 
-    auto const& indices = (dof_tables.size() == 1 && cpl_xs == nullptr)
+    auto const& indices = (cpl_xs == nullptr)
                               ? indices_of_processes[0]
                               : indices_of_processes[cpl_xs->process_id];
     auto const local_xdot = xdot.get(indices);