diff --git a/BaseLib/uniqueInsert.h b/BaseLib/uniqueInsert.h
index 25aa7e2847932863fc6c7087e69fc826ea42b530..d7a7b00040eaaaf874ec86685629218d38090911 100644
--- a/BaseLib/uniqueInsert.h
+++ b/BaseLib/uniqueInsert.h
@@ -15,40 +15,64 @@
 #pragma once
 
 #include <algorithm>
-
+#include <typeinfo>
+#include <typeindex>
 #include "Error.h"
 
-namespace BaseLib {
-
-template<typename Container>
-void uniquePushBack(Container& container, typename Container::value_type const& element)
+namespace BaseLib
+{
+template <typename Container>
+void uniquePushBack(Container& container,
+                    typename Container::value_type const& element)
 {
-    if (std::find(container.begin(), container.end(), element) == container.end())
+    if (std::find(container.begin(), container.end(), element) ==
+        container.end())
         container.push_back(element);
 }
 
+//! Inserts the given \c key with the given \c value into the \c map if an entry
+//! with the
+//! given \c key does not yet exist; otherwise an \c error_message is printed
+//! and the
+//! program is aborted.
+//! Note: The type of \c key must be std::type_index.
+template <typename Map, typename Key, typename Value>
+void insertIfTypeIndexKeyUniqueElseError(Map& map, Key const& key,
+                                         Value&& value,
+                                         std::string const& error_message)
+{
+    auto const inserted = map.emplace(key, std::forward<Value>(value));
+    if (!inserted.second)
+    {  // insertion failed, i.e., key already exists
+        OGS_FATAL("%s Key `%s' already exists.", error_message.c_str(),
+                  tostring(key.hash_code()).c_str());
+    }
+}
 
-//! Inserts the given \c key with the given \c value into the \c map if an entry with the
-//! given \c key does not yet exist; otherwise an \c error_message is printed and the
+//! Inserts the given \c key with the given \c value into the \c map if an entry
+//! with the
+//! given \c key does not yet exist; otherwise an \c error_message is printed
+//! and the
 //! program is aborted.
-template<typename Map, typename Key, typename Value>
-void insertIfKeyUniqueElseError(
-    Map& map, Key const& key, Value&& value,
-    std::string const& error_message)
+template <typename Map, typename Key, typename Value>
+void insertIfKeyUniqueElseError(Map& map, Key const& key, Value&& value,
+                                std::string const& error_message)
 {
     auto const inserted = map.emplace(key, std::forward<Value>(value));
-    if (!inserted.second) { // insertion failed, i.e., key already exists
-        OGS_FATAL("%s Key `%s' already exists.", error_message.c_str(), tostring(key).c_str());
+    if (!inserted.second)
+    {  // insertion failed, i.e., key already exists
+        OGS_FATAL("%s Key `%s' already exists.", error_message.c_str(),
+                  tostring(key).c_str());
     }
 }
 
-//! Inserts the given \c key with the given \c value into the \c map if neither an entry
+//! Inserts the given \c key with the given \c value into the \c map if neither
+//! an entry
 //! with the given \c key nor an entry with the given \c value already exists;
 //! otherwise an \c error_message is printed and the program is aborted.
-template<typename Map, typename Key, typename Value>
-void insertIfKeyValueUniqueElseError(
-    Map& map, Key const& key, Value&& value,
-    std::string const& error_message)
+template <typename Map, typename Key, typename Value>
+void insertIfKeyValueUniqueElseError(Map& map, Key const& key, Value&& value,
+                                     std::string const& error_message)
 {
     auto value_compare = [&value](typename Map::value_type const& elem) {
         return value == elem.second;
@@ -56,45 +80,49 @@ void insertIfKeyValueUniqueElseError(
 
     if (std::find_if(map.cbegin(), map.cend(), value_compare) != map.cend())
     {
-        OGS_FATAL("%s Value `%s' already exists.", error_message.c_str(), tostring(value).c_str());
+        OGS_FATAL("%s Value `%s' already exists.", error_message.c_str(),
+                  tostring(value).c_str());
     }
 
     auto const inserted = map.emplace(key, std::forward<Value>(value));
-    if (!inserted.second) { // insertion failed, i.e., key already exists
-        OGS_FATAL("%s Key `%s' already exists.", error_message.c_str(), tostring(key).c_str());
+    if (!inserted.second)
+    {  // insertion failed, i.e., key already exists
+        OGS_FATAL("%s Key `%s' already exists.", error_message.c_str(),
+                  tostring(key).c_str());
     }
 }
 
 //! Returns the value of \c key from the given \c map if such an entry exists;
 //! otherwise an \c error_message is printed and the program is aborted.
 //! Cf. also the const overload below.
-//! \remark Use as: \code{.cpp} get_or_error<Value>(some_map, some_key, "error message") \endcode
-template<typename Map, typename Key>
-typename Map::mapped_type&
-getOrError(
-    Map& map, Key const& key,
-    std::string const& error_message)
+//! \remark Use as: \code{.cpp} get_or_error<Value>(some_map, some_key, "error
+//! message") \endcode
+template <typename Map, typename Key>
+typename Map::mapped_type& getOrError(Map& map, Key const& key,
+                                      std::string const& error_message)
 {
     auto it = map.find(key);
-    if (it == map.end()) {
-        OGS_FATAL("%s Key `%s' does not exist.", error_message.c_str(), tostring(key).c_str());
+    if (it == map.end())
+    {
+        OGS_FATAL("%s Key `%s' does not exist.", error_message.c_str(),
+                  tostring(key).c_str());
     }
 
     return it->second;
 }
 //! \overload
-template<typename Map, typename Key>
-typename Map::mapped_type const&
-getOrError(
-    Map const& map, Key const& key,
-    std::string const& error_message)
+template <typename Map, typename Key>
+typename Map::mapped_type const& getOrError(Map const& map, Key const& key,
+                                            std::string const& error_message)
 {
     auto it = map.find(key);
-    if (it == map.end()) {
-        OGS_FATAL("%s Key `%s' does not exist.", error_message.c_str(), tostring(key).c_str());
+    if (it == map.end())
+    {
+        OGS_FATAL("%s Key `%s' does not exist.", error_message.c_str(),
+                  tostring(key).c_str());
     }
 
     return it->second;
 }
 
-} // end namespace BaseLib
+}  // end namespace BaseLib
diff --git a/Documentation/ProjectFile/prj/time_loop/global_process_coupling/i_global_process_coupling.md b/Documentation/ProjectFile/prj/time_loop/global_process_coupling/i_global_process_coupling.md
new file mode 100644
index 0000000000000000000000000000000000000000..5263a21d1d8a82be5f52ed369b02b84ab11cfb19
--- /dev/null
+++ b/Documentation/ProjectFile/prj/time_loop/global_process_coupling/i_global_process_coupling.md
@@ -0,0 +1 @@
+Global convergence criteria of the coupling iterations.
\ No newline at end of file
diff --git a/Documentation/ProjectFile/prj/time_loop/global_process_coupling/t_convergence_criterion.md b/Documentation/ProjectFile/prj/time_loop/global_process_coupling/t_convergence_criterion.md
new file mode 100644
index 0000000000000000000000000000000000000000..cc1a434ed44fc6d0350448e5bc465705350a08f2
--- /dev/null
+++ b/Documentation/ProjectFile/prj/time_loop/global_process_coupling/t_convergence_criterion.md
@@ -0,0 +1,2 @@
+Defines the convergence criteria of the global un-coupling loop of the staggered
+ scheme.
diff --git a/Documentation/ProjectFile/prj/time_loop/global_process_coupling/t_max_iter.md b/Documentation/ProjectFile/prj/time_loop/global_process_coupling/t_max_iter.md
new file mode 100644
index 0000000000000000000000000000000000000000..363af52cb64f2648fbdd326e9a3be63606ec851b
--- /dev/null
+++ b/Documentation/ProjectFile/prj/time_loop/global_process_coupling/t_max_iter.md
@@ -0,0 +1 @@
+Defines the maximum iteration number of the coupling loop of the staggered scheme.
diff --git a/Documentation/ProjectFile/prj/time_loop/processes/process/coupled_processes/i_coupled_processes.md b/Documentation/ProjectFile/prj/time_loop/processes/process/coupled_processes/i_coupled_processes.md
new file mode 100644
index 0000000000000000000000000000000000000000..6550f6798a1430360cf2cdd0c1a50dad586cba28
--- /dev/null
+++ b/Documentation/ProjectFile/prj/time_loop/processes/process/coupled_processes/i_coupled_processes.md
@@ -0,0 +1 @@
+Defines the coupled processes to a process.
\ No newline at end of file
diff --git a/Documentation/ProjectFile/prj/time_loop/processes/process/coupled_processes/t_coupled_process.md b/Documentation/ProjectFile/prj/time_loop/processes/process/coupled_processes/t_coupled_process.md
new file mode 100644
index 0000000000000000000000000000000000000000..98b97648e209f0af17fb536111f40f2e6b90adc2
--- /dev/null
+++ b/Documentation/ProjectFile/prj/time_loop/processes/process/coupled_processes/t_coupled_process.md
@@ -0,0 +1 @@
+Specifies a single coupled process to a process by name.
\ No newline at end of file
diff --git a/NumLib/ODESolver/ConvergenceCriterion.h b/NumLib/ODESolver/ConvergenceCriterion.h
index dc9faa5a6de909c37b9b235f32a6538909775702..154b23b4de4c4d7514a4507d1ad2670d45495528 100644
--- a/NumLib/ODESolver/ConvergenceCriterion.h
+++ b/NumLib/ODESolver/ConvergenceCriterion.h
@@ -58,7 +58,11 @@ public:
 
     //! Tell the ConvergenceCriterion that it is called for the first time now
     //! (while solving a specific nonlinear system).
-    virtual void preFirstIteration() {}
+    virtual void preFirstIteration() { _is_first_iteration = true; }
+
+    //! Tell the ConvergenceCriterion that it is not called for the first time
+    //! (while solving a coupling system).
+    virtual void setNoFirstIteration() { _is_first_iteration = false; }
 
     //! Indicate that a new iteration now starts.
     //!
@@ -67,12 +71,16 @@ public:
     //! to be done anew. This method will make the ConvergenceCriterion forget
     //! the result of all checks already done, s.t. all necessary checks will
     //! have to be repeated in order to satisfy the ConvergenceCriterion.
-    virtual void reset() = 0;
+    virtual void reset() { _satisfied = true; _is_first_iteration = false; };
 
     //! Tell if the convergence criterion is satisfied.
-    virtual bool isSatisfied() const = 0;
+    virtual bool isSatisfied() const { return _satisfied; };
 
     virtual ~ConvergenceCriterion() = default;
+
+protected:
+    bool _satisfied = true;
+    bool _is_first_iteration = true;
 };
 
 //! Creates a convergence criterion from the given configuration.
diff --git a/NumLib/ODESolver/ConvergenceCriterionDeltaX.h b/NumLib/ODESolver/ConvergenceCriterionDeltaX.h
index 36fcb9380ef86e8d1f3dd50b4cd3fdfd23081757..75ede7ec830da063c8cb22d4b466c8e17e12a87f 100644
--- a/NumLib/ODESolver/ConvergenceCriterionDeltaX.h
+++ b/NumLib/ODESolver/ConvergenceCriterionDeltaX.h
@@ -36,13 +36,11 @@ public:
                      GlobalVector const& x) override;
     void checkResidual(const GlobalVector& /*residual*/) override {}
 
-    void reset() override { _satisfied = true; }
-    bool isSatisfied() const override { return _satisfied; }
+    void reset() override { this->_satisfied = true; }
 private:
     const boost::optional<double> _abstol;
     const boost::optional<double> _reltol;
     const MathLib::VecNormType _norm_type;
-    bool _satisfied = true;
 };
 
 std::unique_ptr<ConvergenceCriterionDeltaX> createConvergenceCriterionDeltaX(
diff --git a/NumLib/ODESolver/ConvergenceCriterionPerComponentDeltaX.h b/NumLib/ODESolver/ConvergenceCriterionPerComponentDeltaX.h
index 07edfb4b084a74e594cb31bf7d04776dd637dd97..a35ea708527e8c83dc54775637b05e33692b2180 100644
--- a/NumLib/ODESolver/ConvergenceCriterionPerComponentDeltaX.h
+++ b/NumLib/ODESolver/ConvergenceCriterionPerComponentDeltaX.h
@@ -38,8 +38,7 @@ public:
                      GlobalVector const& x) override;
     void checkResidual(const GlobalVector& /*residual*/) override {}
 
-    void reset() override { _satisfied = true; }
-    bool isSatisfied() const override { return _satisfied; }
+    void reset() override { this->_satisfied = true; }
 
     void setDOFTable(const LocalToGlobalIndexMap& dof_table,
                      MeshLib::Mesh const& mesh) override;
@@ -48,7 +47,6 @@ private:
     const std::vector<double> _abstols;
     const std::vector<double> _reltols;
     const MathLib::VecNormType _norm_type;
-    bool _satisfied = true;
     LocalToGlobalIndexMap const* _dof_table = nullptr;
     MeshLib::Mesh const* _mesh = nullptr;
 };
diff --git a/NumLib/ODESolver/ConvergenceCriterionPerComponentResidual.h b/NumLib/ODESolver/ConvergenceCriterionPerComponentResidual.h
index a20a6886a7161184366699289586c05873ec3c25..f54c1a6a932e04919a0d4f4dcc8f5d5f7dd755a2 100644
--- a/NumLib/ODESolver/ConvergenceCriterionPerComponentResidual.h
+++ b/NumLib/ODESolver/ConvergenceCriterionPerComponentResidual.h
@@ -41,10 +41,6 @@ public:
                      GlobalVector const& x) override;
     void checkResidual(const GlobalVector& residual) override;
 
-    void preFirstIteration() override { _is_first_iteration = true; }
-    void reset() override { _satisfied = true; _is_first_iteration = false; }
-    bool isSatisfied() const override { return _satisfied; }
-
     void setDOFTable(const LocalToGlobalIndexMap& dof_table,
                      MeshLib::Mesh const& mesh) override;
 
@@ -52,10 +48,8 @@ private:
     const std::vector<double> _abstols;
     const std::vector<double> _reltols;
     const MathLib::VecNormType _norm_type;
-    bool _satisfied = true;
     LocalToGlobalIndexMap const* _dof_table = nullptr;
     MeshLib::Mesh const* _mesh = nullptr;
-    bool _is_first_iteration = true;
     std::vector<double> _residual_norms_0;
 };
 
diff --git a/NumLib/ODESolver/ConvergenceCriterionResidual.h b/NumLib/ODESolver/ConvergenceCriterionResidual.h
index 21b793f589d60964aacf32002c4ab1434fd74852..b598cfbc83a3920dcbcdf4df61ba4473b5d57b41 100644
--- a/NumLib/ODESolver/ConvergenceCriterionResidual.h
+++ b/NumLib/ODESolver/ConvergenceCriterionResidual.h
@@ -38,15 +38,10 @@ public:
                      GlobalVector const& x) override;
     void checkResidual(const GlobalVector& residual) override;
 
-    void preFirstIteration() override { _is_first_iteration = true; }
-    void reset() override { _satisfied = true; _is_first_iteration = false; }
-    bool isSatisfied() const override { return _satisfied; }
 private:
     const boost::optional<double> _abstol;
     const boost::optional<double> _reltol;
     const MathLib::VecNormType _norm_type;
-    bool _satisfied = true;
-    bool _is_first_iteration = true;
     double _residual_norm_0;
 };
 
diff --git a/NumLib/ODESolver/NonlinearSolver.cpp b/NumLib/ODESolver/NonlinearSolver.cpp
index bf55b1a5d8dc80957d6484ff80602b5900615b9c..fe2139c557c53100914c3439152c200fa9cb45a0 100644
--- a/NumLib/ODESolver/NonlinearSolver.cpp
+++ b/NumLib/ODESolver/NonlinearSolver.cpp
@@ -24,13 +24,14 @@
 namespace NumLib
 {
 void NonlinearSolver<NonlinearSolverTag::Picard>::assemble(
-    GlobalVector const& x) const
+    GlobalVector const& x,
+    ProcessLib::StaggeredCouplingTerm const& coupling_term) const
 {
-    _equation_system->assemble(x);
+    _equation_system->assemble(x, coupling_term);
 }
 
 bool NonlinearSolver<NonlinearSolverTag::Picard>::solve(
-    GlobalVector& x,
+    GlobalVector& x, ProcessLib::StaggeredCouplingTerm const& coupling_term,
     std::function<void(unsigned, GlobalVector const&)> const& postIterationCallback)
 {
     namespace LinAlg = MathLib::LinAlg;
@@ -61,7 +62,7 @@ bool NonlinearSolver<NonlinearSolverTag::Picard>::solve(
 
         BaseLib::RunTime time_assembly;
         time_assembly.start();
-        sys.assemble(x);
+        sys.assemble(x, coupling_term);
         sys.getA(A);
         sys.getRhs(rhs);
         INFO("[time] Assembly took %g s.", time_assembly.elapsed());
@@ -161,16 +162,17 @@ bool NonlinearSolver<NonlinearSolverTag::Picard>::solve(
 }
 
 void NonlinearSolver<NonlinearSolverTag::Newton>::assemble(
-    GlobalVector const& x) const
+    GlobalVector const& x,
+    ProcessLib::StaggeredCouplingTerm const& coupling_term) const
 {
-    _equation_system->assemble(x);
+    _equation_system->assemble(x, coupling_term);
     // TODO if the equation system would be reset to nullptr after each
     //      assemble() or solve() call, the user would be forced to set the
     //      equation every time and could not forget it.
 }
 
 bool NonlinearSolver<NonlinearSolverTag::Newton>::solve(
-    GlobalVector& x,
+    GlobalVector& x, ProcessLib::StaggeredCouplingTerm const& coupling_term,
     std::function<void(unsigned, GlobalVector const&)> const& postIterationCallback)
 {
     namespace LinAlg = MathLib::LinAlg;
@@ -204,7 +206,7 @@ bool NonlinearSolver<NonlinearSolverTag::Newton>::solve(
 
         BaseLib::RunTime time_assembly;
         time_assembly.start();
-        sys.assemble(x);
+        sys.assemble(x, coupling_term);
         sys.getResidual(x, res);
         sys.getJacobian(J);
         INFO("[time] Assembly took %g s.", time_assembly.elapsed());
diff --git a/NumLib/ODESolver/NonlinearSolver.h b/NumLib/ODESolver/NonlinearSolver.h
index c4a9be12b977a547f642c4f192078e1c73697c5d..76fe29f29df3dc9d4f074f9a7d1c98390edff3bb 100644
--- a/NumLib/ODESolver/NonlinearSolver.h
+++ b/NumLib/ODESolver/NonlinearSolver.h
@@ -22,6 +22,11 @@ namespace BaseLib
 class ConfigTree;
 }
 
+namespace ProcessLib
+{
+    struct StaggeredCouplingTerm;
+}
+
 // TODO Document in the ODE solver lib, which matrices and vectors that are
 // passed around as method arguments are guaranteed to be of the right size
 // (and zeroed out) and which are not.
@@ -40,19 +45,29 @@ public:
      * scheme. It is not used for the general solver steps; in those only the
      * solve() method is needed.
      *
-     * \param x   the state at which the equation system will be assembled.
+     * \param x             the state at which the equation system will be
+     *                      assembled.
+     * \param coupling_term  the coupled term including the reference of the
+     *                      coupled processes and solutions of the equations of
+     *                      the coupled processes.
      */
-    virtual void assemble(GlobalVector const& x) const = 0;
+    virtual void assemble(GlobalVector const& x,
+                          ProcessLib::StaggeredCouplingTerm const& coupling_term
+                         ) const = 0;
 
     /*! Assemble and solve the equation system.
      *
      * \param x   in: the initial guess, out: the solution.
+     * \param coupling_term  the coupled term including the reference of the
+     *                      coupled processes and solutions of the equations of
+     *                      the coupled processes.
      * \param postIterationCallback called after each iteration if set.
      *
      * \retval true if the equation system could be solved
      * \retval false otherwise
      */
     virtual bool solve(GlobalVector& x,
+                       ProcessLib::StaggeredCouplingTerm const& coupling_term,
                        std::function<void(unsigned, GlobalVector const&)> const&
                            postIterationCallback) = 0;
 
@@ -101,9 +116,13 @@ public:
         _equation_system = &eq;
         _convergence_criterion = &conv_crit;
     }
-    void assemble(GlobalVector const& x) const override;
+
+    void assemble(GlobalVector const& x,
+                  ProcessLib::StaggeredCouplingTerm const& coupling_term
+                 ) const override;
 
     bool solve(GlobalVector& x,
+               ProcessLib::StaggeredCouplingTerm const& coupling_term,
                std::function<void(unsigned, GlobalVector const&)> const&
                    postIterationCallback) override;
 
@@ -156,9 +175,13 @@ public:
         _equation_system = &eq;
         _convergence_criterion = &conv_crit;
     }
-    void assemble(GlobalVector const& x) const override;
+
+    void assemble(GlobalVector const& x,
+                  ProcessLib::StaggeredCouplingTerm const& coupling_term
+                 ) const override;
 
     bool solve(GlobalVector& x,
+               ProcessLib::StaggeredCouplingTerm const& coupling_term,
                std::function<void(unsigned, GlobalVector const&)> const&
                    postIterationCallback) override;
 
diff --git a/NumLib/ODESolver/NonlinearSystem.h b/NumLib/ODESolver/NonlinearSystem.h
index edfa53160ee62e5dcfc6ea7630cf06fe40813610..e7cffb1755c1357eb6cc3ab7f1dd26fe9eafcab4 100644
--- a/NumLib/ODESolver/NonlinearSystem.h
+++ b/NumLib/ODESolver/NonlinearSystem.h
@@ -12,6 +12,11 @@
 #include "EquationSystem.h"
 #include "Types.h"
 
+namespace ProcessLib
+{
+    struct StaggeredCouplingTerm;
+}
+
 namespace NumLib
 {
 //! \addtogroup ODESolver
@@ -36,7 +41,9 @@ public:
     //! Assembles the linearized equation system at the point \c x.
     //! The linearized system is \f$A(x) \cdot x = b(x)\f$. Here the matrix
     //! \f$A(x)\f$ and the vector \f$b(x)\f$ are assembled.
-    virtual void assemble(GlobalVector const& x) = 0;
+    virtual void assemble(GlobalVector const& x,
+                          ProcessLib::StaggeredCouplingTerm const& coupling_term
+                          ) = 0;
 
     /*! Writes the residual at point \c x to \c res.
      *
@@ -75,7 +82,9 @@ public:
     //! Assembles the linearized equation system at the point \c x.
     //! The linearized system is \f$J(x) \cdot \Delta x = (x)\f$. Here the
     //! residual vector \f$r(x)\f$ and its Jacobian \f$J(x)\f$ are assembled.
-    virtual void assemble(GlobalVector const& x) = 0;
+    virtual void assemble(GlobalVector const& x,
+                          ProcessLib::StaggeredCouplingTerm const& coupling_term
+                         ) = 0;
 
     //! Writes the linearized equation system matrix to \c A.
     //! \pre assemble() must have been called before.
diff --git a/NumLib/ODESolver/ODESystem.h b/NumLib/ODESolver/ODESystem.h
index aafe142c7a5fbc6caca1b9e48aa95ec45177e868..598252c594040e201769a159dfbdee5a20425306 100644
--- a/NumLib/ODESolver/ODESystem.h
+++ b/NumLib/ODESolver/ODESystem.h
@@ -15,6 +15,11 @@
 #include "EquationSystem.h"
 #include "Types.h"
 
+namespace ProcessLib
+{
+struct StaggeredCouplingTerm;
+}
+
 namespace NumLib
 {
 //! \addtogroup ODESolver
@@ -45,9 +50,10 @@ public:
         ODESystemTag::FirstOrderImplicitQuasilinear;
 
     //! Assemble \c M, \c K and \c b at the provided state (\c t, \c x).
-    virtual void assemble(const double t, GlobalVector const& x,
-                          GlobalMatrix& M, GlobalMatrix& K,
-                          GlobalVector& b) = 0;
+    virtual void assemble(
+        const double t, GlobalVector const& x, GlobalMatrix& M, GlobalMatrix& K,
+        GlobalVector& b,
+        ProcessLib::StaggeredCouplingTerm const& coupling_term) = 0;
 
     using Index = MathLib::MatrixVectorTraits<GlobalMatrix>::Index;
 
@@ -77,7 +83,8 @@ public:
      * \f$ \mathtt{Jac} := \partial r/\partial x_N \f$
      * at the provided state (\c t, \c x).
      *
-     * For the meaning of the other parameters refer to the the introductory remarks on
+     * For the meaning of the other parameters refer to the the introductory
+     * remarks on
      * \ref concept_time_discretization "time discretization".
      *
      * \remark
@@ -90,32 +97,37 @@ public:
      *  + \frac{\partial K}{\partial x_N} \cdot x_N
      *  + \frac{\partial b}{\partial x_N},
      *  \f]
-     * where \f$ M \f$, \f$ K \f$ and \f$ b \f$ are matrix-valued (vector-valued, respectively)
+     * where \f$ M \f$, \f$ K \f$ and \f$ b \f$ are matrix-valued
+     * (vector-valued, respectively)
      * functions that depend on \f$ x_C \f$ and \f$ t_C \f$.
      *
-     * Due to the arguments provided to this method its implementation only has to
+     * Due to the arguments provided to this method its implementation only has
+     * to
      * compute the derivatives
      * \f$ \frac{\partial M}{\partial x_N} \cdot \hat x \f$,
      * \f$ \frac{\partial K}{\partial x_N} \cdot x_N    \f$ and
      * \f$ \frac{\partial b}{\partial x_N} \f$.
      * The other terms can be readily taken from the method parameters.
      *
-     * In particular for the ForwardEuler time discretization scheme the equation will
+     * In particular for the ForwardEuler time discretization scheme the
+     * equation will
      * collapse to
      * \f$ \mathtt{Jac} =
      *  M \cdot \frac{\partial \hat x}{\partial x_N}
      *  \f$
      * since in that scheme \f$ x_N \neq x_C \f$.
      *
-     * Of course, the implementation of this method is allowed to compute the Jacobian in a
-     * different way, as long as that is consistent with the definition of \f$ \mathtt{Jac} \f$.
+     * Of course, the implementation of this method is allowed to compute the
+     * Jacobian in a
+     * different way, as long as that is consistent with the definition of \f$
+     * \mathtt{Jac} \f$.
      * \endparblock
      */
-    virtual void assembleWithJacobian(const double t, GlobalVector const& x,
-                                      GlobalVector const& xdot,
-                                      const double dxdot_dx, const double dx_dx,
-                                      GlobalMatrix& M, GlobalMatrix& K,
-                                      GlobalVector& b, GlobalMatrix& Jac) = 0;
+    virtual void assembleWithJacobian(
+        const double t, GlobalVector const& x, GlobalVector const& xdot,
+        const double dxdot_dx, const double dx_dx, GlobalMatrix& M,
+        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac,
+        ProcessLib::StaggeredCouplingTerm const& coupling_term) = 0;
 };
 
 //! @}
diff --git a/NumLib/ODESolver/TimeDiscretizedODESystem.cpp b/NumLib/ODESolver/TimeDiscretizedODESystem.cpp
index a1b6f29685cc0820daf6011fb53fd7615a9f08ae..0684b2657f41d99bead8a27228e17e4b649256aa 100644
--- a/NumLib/ODESolver/TimeDiscretizedODESystem.cpp
+++ b/NumLib/ODESolver/TimeDiscretizedODESystem.cpp
@@ -67,7 +67,8 @@ TimeDiscretizedODESystem<
 
 void TimeDiscretizedODESystem<ODESystemTag::FirstOrderImplicitQuasilinear,
                               NonlinearSolverTag::Newton>::
-    assemble(const GlobalVector& x_new_timestep)
+    assemble(const GlobalVector& x_new_timestep,
+             ProcessLib::StaggeredCouplingTerm const& coupling_term)
 {
     namespace LinAlg = MathLib::LinAlg;
 
@@ -85,7 +86,7 @@ void TimeDiscretizedODESystem<ODESystemTag::FirstOrderImplicitQuasilinear,
     _Jac->setZero();
 
     _ode.assembleWithJacobian(t, x_curr, xdot, dxdot_dx, dx_dx, *_M, *_K, *_b,
-                              *_Jac);
+                              *_Jac, coupling_term);
 
     LinAlg::finalizeAssembly(*_M);
     LinAlg::finalizeAssembly(*_K);
@@ -175,7 +176,8 @@ TimeDiscretizedODESystem<
 
 void TimeDiscretizedODESystem<ODESystemTag::FirstOrderImplicitQuasilinear,
                               NonlinearSolverTag::Picard>::
-    assemble(const GlobalVector& x_new_timestep)
+    assemble(const GlobalVector& x_new_timestep,
+             ProcessLib::StaggeredCouplingTerm const& coupling_term)
 {
     namespace LinAlg = MathLib::LinAlg;
 
@@ -186,7 +188,7 @@ void TimeDiscretizedODESystem<ODESystemTag::FirstOrderImplicitQuasilinear,
     _K->setZero();
     _b->setZero();
 
-    _ode.assemble(t, x_curr, *_M, *_K, *_b);
+    _ode.assemble(t, x_curr, *_M, *_K, *_b, coupling_term);
 
     LinAlg::finalizeAssembly(*_M);
     LinAlg::finalizeAssembly(*_K);
diff --git a/NumLib/ODESolver/TimeDiscretizedODESystem.h b/NumLib/ODESolver/TimeDiscretizedODESystem.h
index 4a9856d08473d38f6118d82da0445116c95b4e76..4aa4fcb345c38dd4f268cf7dc2510c540f31d207 100644
--- a/NumLib/ODESolver/TimeDiscretizedODESystem.h
+++ b/NumLib/ODESolver/TimeDiscretizedODESystem.h
@@ -16,6 +16,12 @@
 #include "ODESystem.h"
 #include "TimeDiscretization.h"
 
+
+namespace ProcessLib
+{
+    struct StaggeredCouplingTerm;
+}
+
 namespace NumLib
 {
 //! \addtogroup ODESolver
@@ -80,7 +86,9 @@ public:
 
     ~TimeDiscretizedODESystem();
 
-    void assemble(const GlobalVector& x_new_timestep) override;
+    void assemble(const GlobalVector& x_new_timestep,
+                  ProcessLib::StaggeredCouplingTerm const& coupling_term)
+                  override;
 
     void getResidual(GlobalVector const& x_new_timestep,
                      GlobalVector& res) const override;
@@ -172,7 +180,9 @@ public:
 
     ~TimeDiscretizedODESystem();
 
-    void assemble(const GlobalVector& x_new_timestep) override;
+    void assemble(const GlobalVector& x_new_timestep,
+                  ProcessLib::StaggeredCouplingTerm const& coupling_term)
+                  override;
 
     void getA(GlobalMatrix& A) const override
     {
diff --git a/ProcessLib/AbstractJacobianAssembler.h b/ProcessLib/AbstractJacobianAssembler.h
index 7989d383b16673deb59c37180702ddc72422efd2..4505fc8c6f95e6ab94892d853cbf14c35e0c262b 100644
--- a/ProcessLib/AbstractJacobianAssembler.h
+++ b/ProcessLib/AbstractJacobianAssembler.h
@@ -14,6 +14,7 @@
 namespace ProcessLib
 {
 class LocalAssemblerInterface;
+struct LocalCouplingTerm;
 
 //! Base class for Jacobian assemblers.
 class AbstractJacobianAssembler
@@ -29,6 +30,18 @@ public:
         std::vector<double>& local_K_data, std::vector<double>& local_b_data,
         std::vector<double>& local_Jac_data) = 0;
 
+    //! Assembles the Jacobian, the matrices \f$M\f$ and \f$K\f$, and the vector
+    //! \f$b\f$ with coupling.
+    virtual void assembleWithJacobianAndCouping(
+        LocalAssemblerInterface& /*local_assembler*/, double const /*t*/,
+        std::vector<double> const& /*local_x*/,
+        std::vector<double> const& /*local_xdot*/, const double /*dxdot_dx*/,
+        const double /*dx_dx*/, std::vector<double>& /*local_M_data*/,
+        std::vector<double>& /*local_K_data*/,
+        std::vector<double>& /*local_b_data*/,
+        std::vector<double>& /*local_Jac_data*/,
+        LocalCouplingTerm const& /*coupling_term*/) {}
+
     virtual ~AbstractJacobianAssembler() = default;
 };
 
diff --git a/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.cpp b/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.cpp
index 42367c1b05eb6268d97259cd46d9cb15da9bc345..68a5c72a3755888d275432349e62fd8c244cc388 100644
--- a/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.cpp
+++ b/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.cpp
@@ -76,20 +76,23 @@ void GroundwaterFlowProcess::assembleConcreteProcess(const double t,
                                                      GlobalVector const& x,
                                                      GlobalMatrix& M,
                                                      GlobalMatrix& K,
-                                                     GlobalVector& b)
+                                                     GlobalVector& b,
+                                                     StaggeredCouplingTerm
+                                                     const& coupling_term)
 {
     DBUG("Assemble GroundwaterFlowProcess.");
 
     // Call global assembler for each local assembly item.
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assemble, _local_assemblers,
-        *_local_to_global_index_map, t, x, M, K, b);
+        *_local_to_global_index_map, t, x, M, K, b, coupling_term);
 }
 
 void GroundwaterFlowProcess::assembleWithJacobianConcreteProcess(
     const double t, GlobalVector const& x, GlobalVector const& xdot,
     const double dxdot_dx, const double dx_dx, GlobalMatrix& M, GlobalMatrix& K,
-    GlobalVector& b, GlobalMatrix& Jac)
+    GlobalVector& b, GlobalMatrix& Jac,
+    StaggeredCouplingTerm const& coupling_term)
 {
     DBUG("AssembleWithJacobian GroundwaterFlowProcess.");
 
@@ -97,7 +100,7 @@ void GroundwaterFlowProcess::assembleWithJacobianConcreteProcess(
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assembleWithJacobian,
         _local_assemblers, *_local_to_global_index_map, t, x, xdot, dxdot_dx,
-        dx_dx, M, K, b, Jac);
+        dx_dx, M, K, b, Jac, coupling_term);
 }
 
 
diff --git a/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.h b/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.h
index f609740a86325e7ffc94238639559df3cbce148c..694a84aa0fd046c79ba7eb0d22209fe13b620e4c 100644
--- a/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.h
+++ b/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.h
@@ -98,12 +98,15 @@ private:
 
     void assembleConcreteProcess(const double t, GlobalVector const& x,
                                  GlobalMatrix& M, GlobalMatrix& K,
-                                 GlobalVector& b) override;
+                                 GlobalVector& b,
+                                 StaggeredCouplingTerm const& coupling_term
+                                ) override;
 
     void assembleWithJacobianConcreteProcess(
         const double t, GlobalVector const& x, GlobalVector const& xdot,
         const double dxdot_dx, const double dx_dx, GlobalMatrix& M,
-        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac) override;
+        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac,
+        StaggeredCouplingTerm const& coupling_term) override;
 
     GroundwaterFlowProcessData _process_data;
 
diff --git a/ProcessLib/HT/HTProcess.cpp b/ProcessLib/HT/HTProcess.cpp
index 2ce02f4b28c1ee5b73199b31c2afa86053e5ce08..3cebe46de0224d92e778695a0e246a68b3e030fc 100644
--- a/ProcessLib/HT/HTProcess.cpp
+++ b/ProcessLib/HT/HTProcess.cpp
@@ -71,20 +71,23 @@ void HTProcess::assembleConcreteProcess(const double t,
                                         GlobalVector const& x,
                                         GlobalMatrix& M,
                                         GlobalMatrix& K,
-                                        GlobalVector& b)
+                                        GlobalVector& b,
+                                        StaggeredCouplingTerm const&
+                                        coupling_term)
 {
     DBUG("Assemble HTProcess.");
 
     // Call global assembler for each local assembly item.
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assemble, _local_assemblers,
-        *_local_to_global_index_map, t, x, M, K, b);
+        *_local_to_global_index_map, t, x, M, K, b, coupling_term);
 }
 
 void HTProcess::assembleWithJacobianConcreteProcess(
     const double t, GlobalVector const& x, GlobalVector const& xdot,
     const double dxdot_dx, const double dx_dx, GlobalMatrix& M, GlobalMatrix& K,
-    GlobalVector& b, GlobalMatrix& Jac)
+    GlobalVector& b, GlobalMatrix& Jac,
+    StaggeredCouplingTerm const& coupling_term)
 {
     DBUG("AssembleWithJacobian HTProcess.");
 
@@ -92,7 +95,7 @@ void HTProcess::assembleWithJacobianConcreteProcess(
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assembleWithJacobian,
         _local_assemblers, *_local_to_global_index_map, t, x, xdot, dxdot_dx,
-        dx_dx, M, K, b, Jac);
+        dx_dx, M, K, b, Jac, coupling_term);
 }
 
 }  // namespace HT
diff --git a/ProcessLib/HT/HTProcess.h b/ProcessLib/HT/HTProcess.h
index 355dba18b5b6247ed94fac2eb8ba09fd0450502b..a5048e295cd1daaa2f4507d3d955a06bb0a8d8d6 100644
--- a/ProcessLib/HT/HTProcess.h
+++ b/ProcessLib/HT/HTProcess.h
@@ -65,12 +65,15 @@ private:
 
     void assembleConcreteProcess(const double t, GlobalVector const& x,
                                  GlobalMatrix& M, GlobalMatrix& K,
-                                 GlobalVector& b) override;
+                                 GlobalVector& b,
+                                 StaggeredCouplingTerm const& coupling_term
+                                ) override;
 
     void assembleWithJacobianConcreteProcess(
         const double t, GlobalVector const& x, GlobalVector const& xdot,
         const double dxdot_dx, const double dx_dx, GlobalMatrix& M,
-        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac) override;
+        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac,
+        StaggeredCouplingTerm const& coupling_term) override;
 
     HTProcessData _process_data;
 
diff --git a/ProcessLib/HeatConduction/HeatConductionProcess.cpp b/ProcessLib/HeatConduction/HeatConductionProcess.cpp
index b82273f8b6f06d6c31e6048d00bba724320e7119..1f3782e6aa5ffa629dd229f5bbe66f1d1af3be18 100644
--- a/ProcessLib/HeatConduction/HeatConductionProcess.cpp
+++ b/ProcessLib/HeatConduction/HeatConductionProcess.cpp
@@ -72,20 +72,23 @@ void HeatConductionProcess::assembleConcreteProcess(const double t,
                                                     GlobalVector const& x,
                                                     GlobalMatrix& M,
                                                     GlobalMatrix& K,
-                                                    GlobalVector& b)
+                                                    GlobalVector& b,
+                                                    StaggeredCouplingTerm
+                                                    const& coupling_term)
 {
     DBUG("Assemble HeatConductionProcess.");
 
     // Call global assembler for each local assembly item.
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assemble, _local_assemblers,
-        *_local_to_global_index_map, t, x, M, K, b);
+        *_local_to_global_index_map, t, x, M, K, b, coupling_term);
 }
 
 void HeatConductionProcess::assembleWithJacobianConcreteProcess(
     const double t, GlobalVector const& x, GlobalVector const& xdot,
     const double dxdot_dx, const double dx_dx, GlobalMatrix& M, GlobalMatrix& K,
-    GlobalVector& b, GlobalMatrix& Jac)
+    GlobalVector& b, GlobalMatrix& Jac,
+    StaggeredCouplingTerm const& coupling_term)
 {
     DBUG("AssembleWithJacobian HeatConductionProcess.");
 
@@ -93,7 +96,7 @@ void HeatConductionProcess::assembleWithJacobianConcreteProcess(
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assembleWithJacobian,
         _local_assemblers, *_local_to_global_index_map, t, x, xdot, dxdot_dx,
-        dx_dx, M, K, b, Jac);
+        dx_dx, M, K, b, Jac, coupling_term);
 }
 
 void HeatConductionProcess::computeSecondaryVariableConcrete(const double t,
diff --git a/ProcessLib/HeatConduction/HeatConductionProcess.h b/ProcessLib/HeatConduction/HeatConductionProcess.h
index dd2c340e1b09560379d108e417a95a7cb1399426..c6eaf6b103ae2c926d1b0f258ca772399e365cab 100644
--- a/ProcessLib/HeatConduction/HeatConductionProcess.h
+++ b/ProcessLib/HeatConduction/HeatConductionProcess.h
@@ -49,12 +49,15 @@ private:
 
     void assembleConcreteProcess(const double t, GlobalVector const& x,
                                  GlobalMatrix& M, GlobalMatrix& K,
-                                 GlobalVector& b) override;
+                                 GlobalVector& b,
+                                 StaggeredCouplingTerm const& coupling_term
+                                ) override;
 
     void assembleWithJacobianConcreteProcess(
         const double t, GlobalVector const& x, GlobalVector const& xdot,
         const double dxdot_dx, const double dx_dx, GlobalMatrix& M,
-        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac) override;
+        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac,
+        StaggeredCouplingTerm const& coupling_term) override;
 
     HeatConductionProcessData _process_data;
 
diff --git a/ProcessLib/HydroMechanics/HydroMechanicsProcess.h b/ProcessLib/HydroMechanics/HydroMechanicsProcess.h
index 3087dfb382a677c1f10d654376fb1bf5166b3b04..69295332b5d9064806d41ff713248f959069172e 100644
--- a/ProcessLib/HydroMechanics/HydroMechanicsProcess.h
+++ b/ProcessLib/HydroMechanics/HydroMechanicsProcess.h
@@ -106,20 +106,24 @@ private:
 
     void assembleConcreteProcess(const double t, GlobalVector const& x,
                                  GlobalMatrix& M, GlobalMatrix& K,
-                                 GlobalVector& b) override
+                                 GlobalVector& b,
+                                 StaggeredCouplingTerm const&
+                                 coupling_term) override
     {
         DBUG("Assemble HydroMechanicsProcess.");
 
         // Call global assembler for each local assembly item.
         GlobalExecutor::executeMemberDereferenced(
             _global_assembler, &VectorMatrixAssembler::assemble,
-            _local_assemblers, *_local_to_global_index_map, t, x, M, K, b);
+            _local_assemblers, *_local_to_global_index_map, t, x, M, K, b,
+            coupling_term);
     }
 
     void assembleWithJacobianConcreteProcess(
         const double t, GlobalVector const& x, GlobalVector const& xdot,
         const double dxdot_dx, const double dx_dx, GlobalMatrix& M,
-        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac) override
+        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac,
+        StaggeredCouplingTerm const& coupling_term) override
     {
         DBUG("AssembleJacobian HydroMechanicsProcess.");
 
@@ -127,7 +131,7 @@ private:
         GlobalExecutor::executeMemberDereferenced(
             _global_assembler, &VectorMatrixAssembler::assembleWithJacobian,
             _local_assemblers, *_local_to_global_index_map, t, x, xdot,
-            dxdot_dx, dx_dx, M, K, b, Jac);
+            dxdot_dx, dx_dx, M, K, b, Jac, coupling_term);
     }
 
     void preTimestepConcreteProcess(GlobalVector const& x, double const t,
diff --git a/ProcessLib/LIE/HydroMechanics/HydroMechanicsProcess.h b/ProcessLib/LIE/HydroMechanics/HydroMechanicsProcess.h
index 1c21e10699ab5aa3cd1c51c2aaf6236651818e11..92a2f3043b81b855c3011e2a9e237f7052b25090 100644
--- a/ProcessLib/LIE/HydroMechanics/HydroMechanicsProcess.h
+++ b/ProcessLib/LIE/HydroMechanics/HydroMechanicsProcess.h
@@ -67,20 +67,24 @@ private:
 
     void assembleConcreteProcess(const double t, GlobalVector const& x,
                                  GlobalMatrix& M, GlobalMatrix& K,
-                                 GlobalVector& b) override
+                                 GlobalVector& b,
+                                 StaggeredCouplingTerm const&
+                                 coupling_term) override
     {
         DBUG("Assemble HydroMechanicsProcess.");
 
         // Call global assembler for each local assembly item.
         GlobalExecutor::executeMemberDereferenced(
             _global_assembler, &VectorMatrixAssembler::assemble,
-            _local_assemblers, *_local_to_global_index_map, t, x, M, K, b);
+            _local_assemblers, *_local_to_global_index_map, t, x, M, K, b,
+            coupling_term);
     }
 
     void assembleWithJacobianConcreteProcess(
         const double t, GlobalVector const& x, GlobalVector const& xdot,
         const double dxdot_dx, const double dx_dx, GlobalMatrix& M,
-        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac) override
+        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac,
+        StaggeredCouplingTerm const& coupling_term) override
     {
         DBUG("AssembleWithJacobian HydroMechanicsProcess.");
 
@@ -88,7 +92,7 @@ private:
         GlobalExecutor::executeMemberDereferenced(
             _global_assembler, &VectorMatrixAssembler::assembleWithJacobian,
             _local_assemblers, *_local_to_global_index_map, t, x, xdot,
-            dxdot_dx, dx_dx, M, K, b, Jac);
+            dxdot_dx, dx_dx, M, K, b, Jac, coupling_term);
     }
 
     void preTimestepConcreteProcess(GlobalVector const& x, double const t,
diff --git a/ProcessLib/LIE/SmallDeformation/SmallDeformationProcess.h b/ProcessLib/LIE/SmallDeformation/SmallDeformationProcess.h
index 3a0e40d93b5f460b4d007ad2f32c2eb311ac3f5c..644388bb7a267d76ff5c8cd6be38a73b0afcc33b 100644
--- a/ProcessLib/LIE/SmallDeformation/SmallDeformationProcess.h
+++ b/ProcessLib/LIE/SmallDeformation/SmallDeformationProcess.h
@@ -65,20 +65,24 @@ private:
 
     void assembleConcreteProcess(const double t, GlobalVector const& x,
                                  GlobalMatrix& M, GlobalMatrix& K,
-                                 GlobalVector& b) override
+                                 GlobalVector& b,
+                                 StaggeredCouplingTerm const&
+                                 coupling_term) override
     {
         DBUG("Assemble SmallDeformationProcess.");
 
         // Call global assembler for each local assembly item.
         GlobalExecutor::executeMemberDereferenced(
             _global_assembler, &VectorMatrixAssembler::assemble,
-            _local_assemblers, *_local_to_global_index_map, t, x, M, K, b);
+            _local_assemblers, *_local_to_global_index_map, t, x, M, K, b,
+            coupling_term);
     }
 
     void assembleWithJacobianConcreteProcess(
         const double t, GlobalVector const& x, GlobalVector const& xdot,
         const double dxdot_dx, const double dx_dx, GlobalMatrix& M,
-        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac) override
+        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac,
+        StaggeredCouplingTerm const& coupling_term) override
     {
         DBUG("AssembleWithJacobian SmallDeformationProcess.");
 
@@ -86,7 +90,7 @@ private:
         GlobalExecutor::executeMemberDereferenced(
             _global_assembler, &VectorMatrixAssembler::assembleWithJacobian,
             _local_assemblers, *_local_to_global_index_map, t, x, xdot,
-            dxdot_dx, dx_dx, M, K, b, Jac);
+            dxdot_dx, dx_dx, M, K, b, Jac, coupling_term);
     }
 
     void preTimestepConcreteProcess(GlobalVector const& x, double const t,
diff --git a/ProcessLib/LiquidFlow/LiquidFlowProcess.cpp b/ProcessLib/LiquidFlow/LiquidFlowProcess.cpp
index 1d9d9389dbc718e5f69603708d74015db923ffef..6897c23707f3bfdffc5f72002bc5a94cc6a5b6b9 100644
--- a/ProcessLib/LiquidFlow/LiquidFlowProcess.cpp
+++ b/ProcessLib/LiquidFlow/LiquidFlowProcess.cpp
@@ -92,19 +92,22 @@ void LiquidFlowProcess::assembleConcreteProcess(const double t,
                                                 GlobalVector const& x,
                                                 GlobalMatrix& M,
                                                 GlobalMatrix& K,
-                                                GlobalVector& b)
+                                                GlobalVector& b,
+                                                StaggeredCouplingTerm const&
+                                                coupling_term)
 {
     DBUG("Assemble LiquidFlowProcess.");
     // Call global assembler for each local assembly item.
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assemble, _local_assemblers,
-        *_local_to_global_index_map, t, x, M, K, b);
+        *_local_to_global_index_map, t, x, M, K, b, coupling_term);
 }
 
 void LiquidFlowProcess::assembleWithJacobianConcreteProcess(
     const double t, GlobalVector const& x, GlobalVector const& xdot,
     const double dxdot_dx, const double dx_dx, GlobalMatrix& M, GlobalMatrix& K,
-    GlobalVector& b, GlobalMatrix& Jac)
+    GlobalVector& b, GlobalMatrix& Jac,
+    StaggeredCouplingTerm const& coupling_term)
 {
     DBUG("AssembleWithJacobian LiquidFlowProcess.");
 
@@ -112,7 +115,7 @@ void LiquidFlowProcess::assembleWithJacobianConcreteProcess(
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assembleWithJacobian,
         _local_assemblers, *_local_to_global_index_map, t, x, xdot, dxdot_dx,
-        dx_dx, M, K, b, Jac);
+        dx_dx, M, K, b, Jac, coupling_term);
 }
 
 void LiquidFlowProcess::computeSecondaryVariableConcrete(const double t,
diff --git a/ProcessLib/LiquidFlow/LiquidFlowProcess.h b/ProcessLib/LiquidFlow/LiquidFlowProcess.h
index 75228ce8cc0d70fd854b222f64568a7fd50ab144..aae25d9108dd6ec096605e96a1442231ac5a117d 100644
--- a/ProcessLib/LiquidFlow/LiquidFlowProcess.h
+++ b/ProcessLib/LiquidFlow/LiquidFlowProcess.h
@@ -76,6 +76,7 @@ public:
                                           GlobalVector const& x) override;
 
     bool isLinear() const override { return true; }
+
 private:
     void initializeConcreteProcess(
         NumLib::LocalToGlobalIndexMap const& dof_table,
@@ -83,12 +84,15 @@ private:
 
     void assembleConcreteProcess(const double t, GlobalVector const& x,
                                  GlobalMatrix& M, GlobalMatrix& K,
-                                 GlobalVector& b) override;
+                                 GlobalVector& b,
+                                 StaggeredCouplingTerm const& coupling_term
+                                ) override;
 
     void assembleWithJacobianConcreteProcess(
         const double t, GlobalVector const& x, GlobalVector const& xdot,
         const double dxdot_dx, const double dx_dx, GlobalMatrix& M,
-        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac) override;
+        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac,
+        StaggeredCouplingTerm const& coupling_term) override;
 
     const int _gravitational_axis_id;
     const double _gravitational_acceleration;
diff --git a/ProcessLib/LocalAssemblerInterface.cpp b/ProcessLib/LocalAssemblerInterface.cpp
index 508f73b6bfad4ed17d8c9981cf90738e2862e7bc..8eb8198e77bd250264e3fe65586125c4476298db 100644
--- a/ProcessLib/LocalAssemblerInterface.cpp
+++ b/ProcessLib/LocalAssemblerInterface.cpp
@@ -13,6 +13,19 @@
 
 namespace ProcessLib
 {
+
+void LocalAssemblerInterface::coupling_assemble(
+    double const /*t*/, std::vector<double> const& /*local_x*/,
+    std::vector<double>& /*local_M_data*/,
+    std::vector<double>& /*local_K_data*/,
+    std::vector<double>& /*local_b_data*/,
+    LocalCouplingTerm const& /*coupling_term*/)
+{
+    OGS_FATAL(
+        "The coupling_assemble() function is not implemented in the local "
+        "assembler.");
+}
+
 void LocalAssemblerInterface::assembleWithJacobian(
     double const /*t*/, std::vector<double> const& /*local_x*/,
     std::vector<double> const& /*local_xdot*/, const double /*dxdot_dx*/,
@@ -26,6 +39,20 @@ void LocalAssemblerInterface::assembleWithJacobian(
         "assembler.");
 }
 
+void LocalAssemblerInterface::assembleWithJacobianAndCouping(
+    double const /*t*/, std::vector<double> const& /*local_x*/,
+    std::vector<double> const& /*local_xdot*/, const double /*dxdot_dx*/,
+    const double /*dx_dx*/, std::vector<double>& /*local_M_data*/,
+    std::vector<double>& /*local_K_data*/,
+    std::vector<double>& /*local_b_data*/,
+    std::vector<double>& /*local_Jac_data*/,
+    LocalCouplingTerm const& /*coupling_term*/)
+{
+    OGS_FATAL(
+        "The assembleWithJacobianAndCouping() function is not implemented in"
+        " the local assembler.");
+}
+
 void LocalAssemblerInterface::computeSecondaryVariable(
                               std::size_t const mesh_item_id,
                               NumLib::LocalToGlobalIndexMap const& dof_table,
diff --git a/ProcessLib/LocalAssemblerInterface.h b/ProcessLib/LocalAssemblerInterface.h
index 8176485f679d31959f251bc8d869ac0691917ed9..a70871b4b337a1b1f2520c1e958353fd11defbbb 100644
--- a/ProcessLib/LocalAssemblerInterface.h
+++ b/ProcessLib/LocalAssemblerInterface.h
@@ -11,6 +11,7 @@
 
 #include "NumLib/NumericsConfig.h"
 #include "MathLib/Point3d.h"
+#include "StaggeredCouplingTerm.h"
 
 namespace NumLib
 {
@@ -35,6 +36,13 @@ public:
         std::vector<double>& local_M_data, std::vector<double>& local_K_data,
         std::vector<double>& local_b_data) = 0;
 
+    virtual void coupling_assemble(double const t,
+                                   std::vector<double> const& local_x,
+                                   std::vector<double>& local_M_data,
+                                   std::vector<double>& local_K_data,
+                                   std::vector<double>& local_b_data,
+                                   LocalCouplingTerm const& coupling_term);
+
     virtual void assembleWithJacobian(double const t,
                                       std::vector<double> const& local_x,
                                       std::vector<double> const& local_xdot,
@@ -44,6 +52,16 @@ public:
                                       std::vector<double>& local_b_data,
                                       std::vector<double>& local_Jac_data);
 
+    virtual void assembleWithJacobianAndCouping(double const t,
+                                      std::vector<double> const& local_x,
+                                      std::vector<double> const& local_xdot,
+                                      const double dxdot_dx, const double dx_dx,
+                                      std::vector<double>& local_M_data,
+                                      std::vector<double>& local_K_data,
+                                      std::vector<double>& local_b_data,
+                                      std::vector<double>& local_Jac_data,
+                                      LocalCouplingTerm const& coupling_term);
+
     virtual void computeSecondaryVariable(std::size_t const mesh_item_id,
                               NumLib::LocalToGlobalIndexMap const& dof_table,
                               const double t, GlobalVector const& x);
diff --git a/ProcessLib/Process.cpp b/ProcessLib/Process.cpp
index 82e75ec037bc5cd0ce317ca748d138dcbd8c0b86..addff2becea498ae7c4462e3a35fdd2db671c140 100644
--- a/ProcessLib/Process.cpp
+++ b/ProcessLib/Process.cpp
@@ -119,9 +119,11 @@ MathLib::MatrixSpecifications Process::getMatrixSpecifications() const
 }
 
 void Process::assemble(const double t, GlobalVector const& x, GlobalMatrix& M,
-                       GlobalMatrix& K, GlobalVector& b)
+                       GlobalMatrix& K, GlobalVector& b,
+                       StaggeredCouplingTerm const& coupling_term)
 {
-    assembleConcreteProcess(t, x, M, K, b);
+    assembleConcreteProcess(t, x, M, K, b, coupling_term);
+
     _boundary_conditions.applyNaturalBC(t, x, K, b);
 }
 
@@ -129,9 +131,11 @@ void Process::assembleWithJacobian(const double t, GlobalVector const& x,
                                    GlobalVector const& xdot,
                                    const double dxdot_dx, const double dx_dx,
                                    GlobalMatrix& M, GlobalMatrix& K,
-                                   GlobalVector& b, GlobalMatrix& Jac)
+                                   GlobalVector& b, GlobalMatrix& Jac,
+                                   StaggeredCouplingTerm const& coupling_term)
 {
-    assembleWithJacobianConcreteProcess(t, x, xdot, dxdot_dx, dx_dx, M, K, b, Jac);
+    assembleWithJacobianConcreteProcess(t, x, xdot, dxdot_dx, dx_dx, M, K, b,
+                                        Jac, coupling_term);
 
     // TODO apply BCs to Jacobian.
     _boundary_conditions.applyNaturalBC(t, x, K, b);
diff --git a/ProcessLib/Process.h b/ProcessLib/Process.h
index fcd4b82ec029f6b475d0f27ef04fd90680f8e9cb..7bec88b578565d61859b8b79acb1fb12bbe956b8 100644
--- a/ProcessLib/Process.h
+++ b/ProcessLib/Process.h
@@ -21,6 +21,7 @@
 #include "SecondaryVariable.h"
 #include "CachedSecondaryVariable.h"
 #include "AbstractJacobianAssembler.h"
+#include "StaggeredCouplingTerm.h"
 #include "VectorMatrixAssembler.h"
 
 namespace MeshLib
@@ -71,13 +72,17 @@ public:
         const override final;
 
     void assemble(const double t, GlobalVector const& x, GlobalMatrix& M,
-                  GlobalMatrix& K, GlobalVector& b) override final;
+                  GlobalMatrix& K, GlobalVector& b,
+                  StaggeredCouplingTerm const& coupling_term)
+                  override final;
 
     void assembleWithJacobian(const double t, GlobalVector const& x,
                               GlobalVector const& xdot, const double dxdot_dx,
                               const double dx_dx, GlobalMatrix& M,
                               GlobalMatrix& K, GlobalVector& b,
-                              GlobalMatrix& Jac) override final;
+                              GlobalMatrix& Jac,
+                              StaggeredCouplingTerm const& coupling_term)
+                              override final;
 
     std::vector<NumLib::IndexValueVector<GlobalIndexType>> const*
     getKnownSolutions(double const t) const override final
@@ -103,6 +108,12 @@ public:
         return _secondary_variables;
     }
 
+    // Get the solution of the previous time step.
+    virtual GlobalVector* getPreviousTimeStepSolution() const
+    {
+       return nullptr;
+    }
+
     // Used as a call back for CalculateSurfaceFlux process.
     virtual std::vector<double> getFlux(std::size_t /*element_id*/,
                                         MathLib::Point3d const& /*p*/,
@@ -131,13 +142,16 @@ private:
 
     virtual void assembleConcreteProcess(const double t, GlobalVector const& x,
                                          GlobalMatrix& M, GlobalMatrix& K,
-                                         GlobalVector& b) = 0;
+                                         GlobalVector& b,
+                                         StaggeredCouplingTerm const&
+                                         coupling_term) = 0;
 
     virtual void assembleWithJacobianConcreteProcess(
         const double t, GlobalVector const& x,
         GlobalVector const& xdot, const double dxdot_dx,
         const double dx_dx, GlobalMatrix& M, GlobalMatrix& K,
-        GlobalVector& b, GlobalMatrix& Jac) = 0;
+        GlobalVector& b, GlobalMatrix& Jac,
+        StaggeredCouplingTerm const& coupling_term) = 0;
 
     virtual void preTimestepConcreteProcess(GlobalVector const& /*x*/,
                                             const double /*t*/,
diff --git a/ProcessLib/RichardsFlow/RichardsFlowProcess.cpp b/ProcessLib/RichardsFlow/RichardsFlowProcess.cpp
index 0838ccf7d6e1d8e2123f465a87c416a9762b3fa5..72ccaf7d4ec1861517c19b9d95aecc25f57e7bbe 100644
--- a/ProcessLib/RichardsFlow/RichardsFlowProcess.cpp
+++ b/ProcessLib/RichardsFlow/RichardsFlowProcess.cpp
@@ -55,20 +55,23 @@ void RichardsFlowProcess::assembleConcreteProcess(const double t,
                                                   GlobalVector const& x,
                                                   GlobalMatrix& M,
                                                   GlobalMatrix& K,
-                                                  GlobalVector& b)
+                                                  GlobalVector& b,
+                                                  StaggeredCouplingTerm const&
+                                                  coupling_term)
 {
     DBUG("Assemble RichardsFlowProcess.");
 
     // Call global assembler for each local assembly item.
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assemble, _local_assemblers,
-        *_local_to_global_index_map, t, x, M, K, b);
+        *_local_to_global_index_map, t, x, M, K, b, coupling_term);
 }
 
 void RichardsFlowProcess::assembleWithJacobianConcreteProcess(
     const double t, GlobalVector const& x, GlobalVector const& xdot,
     const double dxdot_dx, const double dx_dx, GlobalMatrix& M, GlobalMatrix& K,
-    GlobalVector& b, GlobalMatrix& Jac)
+    GlobalVector& b, GlobalMatrix& Jac,
+    StaggeredCouplingTerm const& coupling_term)
 {
     DBUG("AssembleWithJacobian RichardsFlowProcess.");
 
@@ -76,7 +79,7 @@ void RichardsFlowProcess::assembleWithJacobianConcreteProcess(
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assembleWithJacobian,
         _local_assemblers, *_local_to_global_index_map, t, x, xdot, dxdot_dx,
-        dx_dx, M, K, b, Jac);
+        dx_dx, M, K, b, Jac, coupling_term);
 }
 
 }  // namespace RichardsFlow
diff --git a/ProcessLib/RichardsFlow/RichardsFlowProcess.h b/ProcessLib/RichardsFlow/RichardsFlowProcess.h
index 954ba7af7b8640a461a9314b2251d04ef13679e1..99a88b7806c7973de1de85be81b7efa41b84186e 100644
--- a/ProcessLib/RichardsFlow/RichardsFlowProcess.h
+++ b/ProcessLib/RichardsFlow/RichardsFlowProcess.h
@@ -49,12 +49,15 @@ private:
 
     void assembleConcreteProcess(const double t, GlobalVector const& x,
                                  GlobalMatrix& M, GlobalMatrix& K,
-                                 GlobalVector& b) override;
+                                 GlobalVector& b,
+                                 StaggeredCouplingTerm const& coupling_term
+                                ) override;
 
     void assembleWithJacobianConcreteProcess(
         const double t, GlobalVector const& x, GlobalVector const& xdot,
         const double dxdot_dx, const double dx_dx, GlobalMatrix& M,
-        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac) override;
+        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac,
+        StaggeredCouplingTerm const& coupling_term) override;
 
     RichardsFlowProcessData _process_data;
 
diff --git a/ProcessLib/SmallDeformation/SmallDeformationProcess.h b/ProcessLib/SmallDeformation/SmallDeformationProcess.h
index a5c92921db6d0767374268609ddcbd4d42b98557..cdf235b7c446d904044cae813d1567b9808b9975 100644
--- a/ProcessLib/SmallDeformation/SmallDeformationProcess.h
+++ b/ProcessLib/SmallDeformation/SmallDeformationProcess.h
@@ -143,20 +143,24 @@ private:
 
     void assembleConcreteProcess(const double t, GlobalVector const& x,
                                  GlobalMatrix& M, GlobalMatrix& K,
-                                 GlobalVector& b) override
+                                 GlobalVector& b,
+                                 StaggeredCouplingTerm const&
+                                 coupling_term) override
     {
         DBUG("Assemble SmallDeformationProcess.");
 
         // Call global assembler for each local assembly item.
         GlobalExecutor::executeMemberDereferenced(
             _global_assembler, &VectorMatrixAssembler::assemble,
-            _local_assemblers, *_local_to_global_index_map, t, x, M, K, b);
+            _local_assemblers, *_local_to_global_index_map, t, x, M, K, b,
+            coupling_term);
     }
 
     void assembleWithJacobianConcreteProcess(
         const double t, GlobalVector const& x, GlobalVector const& xdot,
         const double dxdot_dx, const double dx_dx, GlobalMatrix& M,
-        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac) override
+        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac,
+        StaggeredCouplingTerm const& coupling_term) override
     {
         DBUG("AssembleWithJacobian SmallDeformationProcess.");
 
@@ -164,7 +168,7 @@ private:
         GlobalExecutor::executeMemberDereferenced(
             _global_assembler, &VectorMatrixAssembler::assembleWithJacobian,
             _local_assemblers, *_local_to_global_index_map, t, x, xdot,
-            dxdot_dx, dx_dx, M, K, b, Jac);
+            dxdot_dx, dx_dx, M, K, b, Jac, coupling_term);
     }
 
     void preTimestepConcreteProcess(GlobalVector const& x, double const t,
diff --git a/ProcessLib/StaggeredCouplingTerm.cpp b/ProcessLib/StaggeredCouplingTerm.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a6468487e85ee94465d003347c91cd093ed460cc
--- /dev/null
+++ b/ProcessLib/StaggeredCouplingTerm.cpp
@@ -0,0 +1,26 @@
+/**
+ * \copyright
+ * Copyright (c) 2012-2017, OpenGeoSys Community (http://www.opengeosys.org)
+ *            Distributed under a Modified BSD License.
+ *              See accompanying file LICENSE.txt or
+ *              http://www.opengeosys.org/project/license
+ *
+ * \file   StaggeredCouplingTerm.cpp
+ *
+ * Created on November 7, 2016, 12:14 PM
+ */
+
+#include "StaggeredCouplingTerm.h"
+#include "Process.h"
+
+namespace ProcessLib
+{
+const StaggeredCouplingTerm createVoidStaggeredCouplingTerm()
+{
+    std::unordered_map<std::type_index, Process const&> coupled_processes;
+    std::unordered_map<std::type_index, GlobalVector const&> coupled_xs;
+    const bool empty = true;
+    return StaggeredCouplingTerm(coupled_processes, coupled_xs, 0.0, empty);
+}
+
+}  // end of ProcessLib
diff --git a/ProcessLib/StaggeredCouplingTerm.h b/ProcessLib/StaggeredCouplingTerm.h
new file mode 100644
index 0000000000000000000000000000000000000000..8089d0a07e0103b14d0b562544f46815fbb6992a
--- /dev/null
+++ b/ProcessLib/StaggeredCouplingTerm.h
@@ -0,0 +1,107 @@
+/**
+ * \copyright
+ * Copyright (c) 2012-2017, OpenGeoSys Community (http://www.opengeosys.org)
+ *            Distributed under a Modified BSD License.
+ *              See accompanying file LICENSE.txt or
+ *              http://www.opengeosys.org/project/license
+ *
+ * \file   StaggeredCouplingTerm.h
+ *
+ * Created on November 7, 2016, 12:14 PM
+ */
+
+#pragma once
+
+#include <unordered_map>
+#include <typeindex>
+
+#include "MathLib/LinAlg/GlobalMatrixVectorTypes.h"
+
+namespace ProcessLib
+{
+class Process;
+
+/**
+ *  A struct to keep the references of the coupled processes and the references
+ *  of the current solutions of the equations of the coupled processes.
+ *
+ *  During staggered coupling iteration, an instance of this struct is created
+ *  and passed through interfaces to global and local assemblers for each
+ *  process.
+ */
+struct StaggeredCouplingTerm
+{
+    StaggeredCouplingTerm(
+        std::unordered_map<std::type_index, Process const&> const&
+            coupled_processes_,
+        std::unordered_map<std::type_index, GlobalVector const&> const&
+            coupled_xs_,
+        const double dt_, const bool empty_ = false)
+        : coupled_processes(coupled_processes_),
+          coupled_xs(coupled_xs_),
+          dt(dt_),
+          empty(empty_)
+    {
+    }
+
+    /// References to the coupled processes are distinguished by the keys of
+    /// process types.
+    std::unordered_map<std::type_index, Process const&> const&
+        coupled_processes;
+
+    /// References to the current solutions of the coupled processes.
+    /// The coupled solutions are distinguished by the keys of process types.
+    std::unordered_map<std::type_index, GlobalVector const&> const& coupled_xs;
+
+    const double dt;   ///< Time step size.
+    const bool empty;  ///< Flag to indicate whether the couping term is empty.
+};
+
+/**
+ *  A struct to keep the references of the coupled processes, and local element
+ *  solutions  of the current and previous time step solutions of the equations
+ *  of the coupled processes.
+ *
+ *  During the global assembly loop, an instance of this struct is created for
+ *  each element and it is then passed to local assemblers.
+ */
+struct LocalCouplingTerm
+{
+    LocalCouplingTerm(
+        const double dt_,
+        std::unordered_map<std::type_index, Process const&> const&
+            coupled_processes_,
+        std::unordered_map<std::type_index, const std::vector<double>>&&
+            local_coupled_xs0_,
+        std::unordered_map<std::type_index, const std::vector<double>>&&
+            local_coupled_xs_)
+        : dt(dt_),
+          coupled_processes(coupled_processes_),
+          local_coupled_xs0(std::move(local_coupled_xs0_)),
+          local_coupled_xs(std::move(local_coupled_xs_))
+    {
+    }
+
+    const double dt;  ///< Time step size.
+
+    /// References to the coupled processes are distinguished by the keys of
+    /// process types.
+    std::unordered_map<std::type_index, Process const&> const&
+        coupled_processes;
+
+    /// Local solutions of the previous time step.
+    std::unordered_map<std::type_index, const std::vector<double>> const
+        local_coupled_xs0;
+    /// Local solutions of the current time step.
+    std::unordered_map<std::type_index, const std::vector<double>> const
+        local_coupled_xs;
+};
+
+/**
+ *  A function to create a void instance of StaggeredCouplingTerm. The void
+ *  instance is for the StaggeredCouplingTerm reference type of argument
+ *  function or class member to indicate no coupling.
+ */
+const StaggeredCouplingTerm createVoidStaggeredCouplingTerm();
+
+}  // end of ProcessLib
diff --git a/ProcessLib/TES/TESProcess.cpp b/ProcessLib/TES/TESProcess.cpp
index bad86ba65553db5d0867fb1798ce3a3397ce370b..9ccb2d4f15ffdb281dca69e7f33aefec73aac94e 100644
--- a/ProcessLib/TES/TESProcess.cpp
+++ b/ProcessLib/TES/TESProcess.cpp
@@ -226,25 +226,28 @@ void TESProcess::assembleConcreteProcess(const double t,
                                          GlobalVector const& x,
                                          GlobalMatrix& M,
                                          GlobalMatrix& K,
-                                         GlobalVector& b)
+                                         GlobalVector& b,
+                                         StaggeredCouplingTerm
+                                         const& coupling_term)
 {
     DBUG("Assemble TESProcess.");
 
     // Call global assembler for each local assembly item.
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assemble, _local_assemblers,
-        *_local_to_global_index_map, t, x, M, K, b);
+        *_local_to_global_index_map, t, x, M, K, b, coupling_term);
 }
 
 void TESProcess::assembleWithJacobianConcreteProcess(
     const double t, GlobalVector const& x, GlobalVector const& xdot,
     const double dxdot_dx, const double dx_dx, GlobalMatrix& M, GlobalMatrix& K,
-    GlobalVector& b, GlobalMatrix& Jac)
+    GlobalVector& b, GlobalMatrix& Jac,
+    StaggeredCouplingTerm const& coupling_term)
 {
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assembleWithJacobian,
         _local_assemblers, *_local_to_global_index_map, t, x, xdot, dxdot_dx,
-        dx_dx, M, K, b, Jac);
+        dx_dx, M, K, b, Jac, coupling_term);
 }
 
 void TESProcess::preTimestepConcreteProcess(GlobalVector const& x,
diff --git a/ProcessLib/TES/TESProcess.h b/ProcessLib/TES/TESProcess.h
index 34073e1a02526ecbe46e2337ce7f248f7323a5b2..f75afb5b181adae3720af453c4eccd5df3f5f1d4 100644
--- a/ProcessLib/TES/TESProcess.h
+++ b/ProcessLib/TES/TESProcess.h
@@ -56,14 +56,17 @@ private:
 
     void assembleConcreteProcess(const double t, GlobalVector const& x,
                                  GlobalMatrix& M, GlobalMatrix& K,
-                                 GlobalVector& b) override;
+                                 GlobalVector& b,
+                                 StaggeredCouplingTerm const& coupling_term
+                                ) override;
 
     void initializeSecondaryVariables();
 
     void assembleWithJacobianConcreteProcess(
         const double t, GlobalVector const& x, GlobalVector const& xdot,
         const double dxdot_dx, const double dx_dx, GlobalMatrix& M,
-        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac) override;
+        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac,
+        StaggeredCouplingTerm const& coupling_term) override;
 
     GlobalVector const& computeVapourPartialPressure(
         GlobalVector const& x,
diff --git a/ProcessLib/TwoPhaseFlowWithPP/TwoPhaseFlowWithPPProcess.cpp b/ProcessLib/TwoPhaseFlowWithPP/TwoPhaseFlowWithPPProcess.cpp
index 669fa71c3f6d74a4118f6aa66bab614ffc98a174..2cbb77f5b51e0b3f963aa80508374ba02e887c31 100644
--- a/ProcessLib/TwoPhaseFlowWithPP/TwoPhaseFlowWithPPProcess.cpp
+++ b/ProcessLib/TwoPhaseFlowWithPP/TwoPhaseFlowWithPPProcess.cpp
@@ -72,19 +72,22 @@ void TwoPhaseFlowWithPPProcess::assembleConcreteProcess(const double t,
                                                         GlobalVector const& x,
                                                         GlobalMatrix& M,
                                                         GlobalMatrix& K,
-                                                        GlobalVector& b)
+                                                        GlobalVector& b,
+                                                        StaggeredCouplingTerm
+                                                        const& coupling_term)
 {
     DBUG("Assemble TwoPhaseFlowWithPPProcess.");
     // Call global assembler for each local assembly item.
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assemble, _local_assemblers,
-        *_local_to_global_index_map, t, x, M, K, b);
+        *_local_to_global_index_map, t, x, M, K, b, coupling_term);
 }
 
 void TwoPhaseFlowWithPPProcess::assembleWithJacobianConcreteProcess(
     const double t, GlobalVector const& x, GlobalVector const& xdot,
     const double dxdot_dx, const double dx_dx, GlobalMatrix& M, GlobalMatrix& K,
-    GlobalVector& b, GlobalMatrix& Jac)
+    GlobalVector& b, GlobalMatrix& Jac,
+    StaggeredCouplingTerm const& coupling_term)
 {
     DBUG("AssembleWithJacobian TwoPhaseFlowWithPPProcess.");
 
@@ -92,7 +95,7 @@ void TwoPhaseFlowWithPPProcess::assembleWithJacobianConcreteProcess(
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assembleWithJacobian,
         _local_assemblers, *_local_to_global_index_map, t, x, xdot, dxdot_dx,
-        dx_dx, M, K, b, Jac);
+        dx_dx, M, K, b, Jac, coupling_term);
 }
 
 }  // end of namespace
diff --git a/ProcessLib/TwoPhaseFlowWithPP/TwoPhaseFlowWithPPProcess.h b/ProcessLib/TwoPhaseFlowWithPP/TwoPhaseFlowWithPPProcess.h
index ee012075e0eae94d7977e8240dca87239d7e118b..42854353fe426ccf8c388066ffe5a8d900cec95f 100644
--- a/ProcessLib/TwoPhaseFlowWithPP/TwoPhaseFlowWithPPProcess.h
+++ b/ProcessLib/TwoPhaseFlowWithPP/TwoPhaseFlowWithPPProcess.h
@@ -52,6 +52,7 @@ public:
             curves);
 
     bool isLinear() const override { return false; }
+
 private:
     void initializeConcreteProcess(
         NumLib::LocalToGlobalIndexMap const& dof_table,
@@ -59,12 +60,15 @@ private:
 
     void assembleConcreteProcess(const double t, GlobalVector const& x,
                                  GlobalMatrix& M, GlobalMatrix& K,
-                                 GlobalVector& b) override;
+                                 GlobalVector& b,
+                                 StaggeredCouplingTerm const& coupling_term
+                                ) override;
 
     void assembleWithJacobianConcreteProcess(
         const double t, GlobalVector const& x, GlobalVector const& xdot,
         const double dxdot_dx, const double dx_dx, GlobalMatrix& M,
-        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac) override;
+        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac,
+        StaggeredCouplingTerm const& coupling_term) override;
 
     TwoPhaseFlowWithPPProcessData _process_data;
 
diff --git a/ProcessLib/TwoPhaseFlowWithPrho/TwoPhaseFlowWithPrhoProcess.cpp b/ProcessLib/TwoPhaseFlowWithPrho/TwoPhaseFlowWithPrhoProcess.cpp
index 2125e18045db29eab3a448e399572febb158a2f6..e57594d1c2035a383ae824e116fd4fd8226ac3d0 100644
--- a/ProcessLib/TwoPhaseFlowWithPrho/TwoPhaseFlowWithPrhoProcess.cpp
+++ b/ProcessLib/TwoPhaseFlowWithPrho/TwoPhaseFlowWithPrhoProcess.cpp
@@ -72,19 +72,22 @@ void TwoPhaseFlowWithPrhoProcess::assembleConcreteProcess(const double t,
                                                           GlobalVector const& x,
                                                           GlobalMatrix& M,
                                                           GlobalMatrix& K,
-                                                          GlobalVector& b)
+                                                          GlobalVector& b,
+                                                          StaggeredCouplingTerm
+                                                          const& coupling_term)
 {
     DBUG("Assemble TwoPhaseFlowWithPrhoProcess.");
     // Call global assembler for each local assembly item.
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assemble, _local_assemblers,
-        *_local_to_global_index_map, t, x, M, K, b);
+        *_local_to_global_index_map, t, x, M, K, b, coupling_term);
 }
 
 void TwoPhaseFlowWithPrhoProcess::assembleWithJacobianConcreteProcess(
     const double t, GlobalVector const& x, GlobalVector const& xdot,
     const double dxdot_dx, const double dx_dx, GlobalMatrix& M, GlobalMatrix& K,
-    GlobalVector& b, GlobalMatrix& Jac)
+    GlobalVector& b, GlobalMatrix& Jac,
+    StaggeredCouplingTerm const& coupling_term)
 {
     DBUG("AssembleWithJacobian TwoPhaseFlowWithPrhoProcess.");
 
@@ -92,7 +95,7 @@ void TwoPhaseFlowWithPrhoProcess::assembleWithJacobianConcreteProcess(
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assembleWithJacobian,
         _local_assemblers, *_local_to_global_index_map, t, x, xdot, dxdot_dx,
-        dx_dx, M, K, b, Jac);
+        dx_dx, M, K, b, Jac, coupling_term);
 }
 void TwoPhaseFlowWithPrhoProcess::preTimestepConcreteProcess(
     GlobalVector const& x, double const t, double const dt)
diff --git a/ProcessLib/TwoPhaseFlowWithPrho/TwoPhaseFlowWithPrhoProcess.h b/ProcessLib/TwoPhaseFlowWithPrho/TwoPhaseFlowWithPrhoProcess.h
index ccf2163602fa2908caa449cd5e9213e6bc1ee8dc..b148a74985192e8fb92ea2360813b349800f2a7c 100644
--- a/ProcessLib/TwoPhaseFlowWithPrho/TwoPhaseFlowWithPrhoProcess.h
+++ b/ProcessLib/TwoPhaseFlowWithPrho/TwoPhaseFlowWithPrhoProcess.h
@@ -56,12 +56,14 @@ private:
 
     void assembleConcreteProcess(const double t, GlobalVector const& x,
                                  GlobalMatrix& M, GlobalMatrix& K,
-                                 GlobalVector& b) override;
+                                 GlobalVector& b,
+                                 StaggeredCouplingTerm const& coupling_term) override;
 
     void assembleWithJacobianConcreteProcess(
         const double t, GlobalVector const& x, GlobalVector const& xdot,
         const double dxdot_dx, const double dx_dx, GlobalMatrix& M,
-        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac) override;
+        GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac,
+        StaggeredCouplingTerm const& coupling_term) override;
 
     void preTimestepConcreteProcess(GlobalVector const& x, const double t,
                                     const double delta_t) override;
diff --git a/ProcessLib/UncoupledProcessesTimeLoop.cpp b/ProcessLib/UncoupledProcessesTimeLoop.cpp
index 4640820555bcdd328ebe524778c45e6ec9a6bbdd..ab2301a1e8c92eb66619d4a385d56b294ecf46de 100644
--- a/ProcessLib/UncoupledProcessesTimeLoop.cpp
+++ b/ProcessLib/UncoupledProcessesTimeLoop.cpp
@@ -15,6 +15,8 @@
 #include "NumLib/ODESolver/ConvergenceCriterionPerComponent.h"
 #include "NumLib/TimeStepping/Algorithms/FixedTimeStepping.h"
 
+#include "MathLib/LinAlg/LinAlg.h"
+
 std::unique_ptr<NumLib::ITimeStepAlgorithm> createTimeStepper(
     BaseLib::ConfigTree const& config)
 {
@@ -51,7 +53,6 @@ std::unique_ptr<ProcessLib::Output> createOutput(
     return ProcessLib::Output::newInstance(config, output_directory);
 }
 
-
 //! Sets the EquationSystem for the given nonlinear solver,
 //! which is Picard or Newton depending on the NLTag.
 template <NumLib::NonlinearSolverTag NLTag>
@@ -82,12 +83,10 @@ static void setEquationSystem(NumLib::NonlinearSolverBase& nonlinear_solver,
     switch (nl_tag)
     {
         case Tag::Picard:
-            setEquationSystem<Tag::Picard>(nonlinear_solver, eq_sys,
-                                           conv_crit);
+            setEquationSystem<Tag::Picard>(nonlinear_solver, eq_sys, conv_crit);
             break;
         case Tag::Newton:
-            setEquationSystem<Tag::Newton>(nonlinear_solver, eq_sys,
-                                           conv_crit);
+            setEquationSystem<Tag::Newton>(nonlinear_solver, eq_sys, conv_crit);
             break;
     }
 }
@@ -133,6 +132,8 @@ struct SingleProcessData
         std::unique_ptr<NumLib::ConvergenceCriterion>&& conv_crit_,
         std::unique_ptr<NumLib::TimeDiscretization>&& time_disc_,
         Process& process_,
+        std::unordered_map<std::type_index, Process const&>&&
+            coupled_processes_,
         ProcessOutput&& process_output_);
 
     SingleProcessData(SingleProcessData&& spd);
@@ -150,6 +151,8 @@ struct SingleProcessData
     NumLib::InternalMatrixStorage* mat_strg = nullptr;
 
     Process& process;
+    /// Coupled processes.
+    std::unordered_map<std::type_index, Process const&> const coupled_processes;
     ProcessOutput process_output;
 };
 
@@ -159,12 +162,14 @@ SingleProcessData::SingleProcessData(
     std::unique_ptr<NumLib::ConvergenceCriterion>&& conv_crit_,
     std::unique_ptr<NumLib::TimeDiscretization>&& time_disc_,
     Process& process_,
+    std::unordered_map<std::type_index, Process const&>&& coupled_processes_,
     ProcessOutput&& process_output_)
     : nonlinear_solver_tag(NLTag),
       nonlinear_solver(nonlinear_solver),
       conv_crit(std::move(conv_crit_)),
       time_disc(std::move(time_disc_)),
       process(process_),
+      coupled_processes(coupled_processes_),
       process_output(std::move(process_output_))
 {
 }
@@ -177,6 +182,7 @@ SingleProcessData::SingleProcessData(SingleProcessData&& spd)
       tdisc_ode_sys(std::move(spd.tdisc_ode_sys)),
       mat_strg(spd.mat_strg),
       process(spd.process),
+      coupled_processes(spd.coupled_processes),
       process_output(std::move(spd.process_output))
 {
     spd.mat_strg = nullptr;
@@ -240,6 +246,7 @@ std::unique_ptr<SingleProcessData> makeSingleProcessData(
     Process& process,
     std::unique_ptr<NumLib::TimeDiscretization>&& time_disc,
     std::unique_ptr<NumLib::ConvergenceCriterion>&& conv_crit,
+    std::unordered_map<std::type_index, Process const&>&& coupled_processes,
     ProcessOutput&& process_output)
 {
     using Tag = NumLib::NonlinearSolverTag;
@@ -250,7 +257,8 @@ std::unique_ptr<SingleProcessData> makeSingleProcessData(
     {
         return std::unique_ptr<SingleProcessData>{new SingleProcessData{
             *nonlinear_solver_picard, std::move(conv_crit),
-            std::move(time_disc), process, std::move(process_output)}};
+            std::move(time_disc), process, std::move(coupled_processes),
+            std::move(process_output)}};
     }
     else if (auto* nonlinear_solver_newton =
                  dynamic_cast<NumLib::NonlinearSolver<Tag::Newton>*>(
@@ -258,16 +266,18 @@ std::unique_ptr<SingleProcessData> makeSingleProcessData(
     {
         return std::unique_ptr<SingleProcessData>{new SingleProcessData{
             *nonlinear_solver_newton, std::move(conv_crit),
-            std::move(time_disc), process, std::move(process_output)}};
-    } else {
+            std::move(time_disc), process, std::move(coupled_processes),
+            std::move(process_output)}};
+    }
+    else
+    {
         OGS_FATAL("Encountered unknown nonlinear solver type. Aborting");
     }
 }
 
 std::vector<std::unique_ptr<SingleProcessData>> createPerProcessData(
     BaseLib::ConfigTree const& config,
-    const std::map<std::string, std::unique_ptr<Process>>&
-        processes,
+    const std::map<std::string, std::unique_ptr<Process>>& processes,
     std::map<std::string, std::unique_ptr<NumLib::NonlinearSolverBase>> const&
         nonlinear_solvers)
 {
@@ -297,12 +307,38 @@ std::vector<std::unique_ptr<SingleProcessData>> createPerProcessData(
             //! \ogs_file_param{prj__time_loop__processes__process__convergence_criterion}
             pcs_config.getConfigSubtree("convergence_criterion"));
 
+        auto const& coupled_process_tree
+            //! \ogs_file_param{prj__time_loop__processes__process__coupled_processes}
+            = pcs_config.getConfigSubtreeOptional("coupled_processes");
+        std::unordered_map<std::type_index, Process const&> coupled_processes;
+        if (coupled_process_tree)
+        {
+            for (
+                auto const cpl_pcs_name :
+                //! \ogs_file_param{prj__time_loop__processes__process__coupled_processes__coupled_process}
+                coupled_process_tree->getConfigParameterList<std::string>(
+                    "coupled_process"))
+            {
+                auto const& cpl_pcs_ptr = BaseLib::getOrError(
+                    processes, cpl_pcs_name,
+                    "A process with the given name has not been defined.");
+
+                auto const inserted = coupled_processes.emplace(
+                    std::type_index(typeid(*cpl_pcs_ptr)), *cpl_pcs_ptr);
+                if (!inserted.second)
+                {  // insertion failed, i.e., key already exists
+                    OGS_FATAL("Coupled process `%s' already exists.",
+                              cpl_pcs_name.data());
+                }
+            }
+        }
+
         //! \ogs_file_param{prj__time_loop__processes__process__output}
         ProcessOutput process_output{pcs_config.getConfigSubtree("output")};
 
         per_process_data.emplace_back(makeSingleProcessData(
             nl_slv, pcs, std::move(time_disc), std::move(conv_crit),
-            std::move(process_output)));
+            std::move(coupled_processes), std::move(process_output)));
     }
 
     if (per_process_data.size() != processes.size())
@@ -315,11 +351,26 @@ std::vector<std::unique_ptr<SingleProcessData>> createPerProcessData(
 
 std::unique_ptr<UncoupledProcessesTimeLoop> createUncoupledProcessesTimeLoop(
     BaseLib::ConfigTree const& config, std::string const& output_directory,
-    const std::map<std::string, std::unique_ptr<Process>>&
-        processes,
+    const std::map<std::string, std::unique_ptr<Process>>& processes,
     const std::map<std::string, std::unique_ptr<NumLib::NonlinearSolverBase>>&
         nonlinear_solvers)
 {
+    auto const& coupling_config
+        //! \ogs_file_param{prj__time_loop__global_process_coupling}
+        = config.getConfigSubtreeOptional("global_process_coupling");
+
+    std::unique_ptr<NumLib::ConvergenceCriterion> coupling_conv_crit = nullptr;
+    unsigned max_coupling_iterations = 1;
+    if (coupling_config)
+    {
+        max_coupling_iterations
+            //! \ogs_file_param{prj__time_loop__global_process_coupling__max_iter}
+            = coupling_config->getConfigParameter<unsigned>("max_iter");
+        coupling_conv_crit = NumLib::createConvergenceCriterion(
+            //! \ogs_file_param{prj__time_loop__global_process_coupling__convergence_criterion}
+            coupling_config->getConfigSubtree("convergence_criterion"));
+    }
+
     auto timestepper =
         //! \ogs_file_param{prj__time_loop__time_stepping}
         createTimeStepper(config.getConfigSubtree("time_stepping"));
@@ -333,9 +384,10 @@ std::unique_ptr<UncoupledProcessesTimeLoop> createUncoupledProcessesTimeLoop(
         config.getConfigSubtree("processes"), processes, nonlinear_solvers);
 
     return std::unique_ptr<UncoupledProcessesTimeLoop>{
-        new UncoupledProcessesTimeLoop{std::move(timestepper),
-                                       std::move(output),
-                                       std::move(per_process_data)}};
+        new UncoupledProcessesTimeLoop{
+            std::move(timestepper), std::move(output),
+            std::move(per_process_data), max_coupling_iterations,
+            std::move(coupling_conv_crit)}};
 }
 
 std::vector<GlobalVector*> setInitialConditions(
@@ -371,7 +423,8 @@ std::vector<GlobalVector*> setInitialConditions(
             auto& conv_crit = *spd->conv_crit;
 
             setEquationSystem(nonlinear_solver, ode_sys, conv_crit, nl_tag);
-            nonlinear_solver.assemble(x0);
+            nonlinear_solver.assemble(
+                x0, ProcessLib::createVoidStaggeredCouplingTerm());
             time_disc.pushState(
                 t0, x0, mat_strg);  // TODO: that might do duplicate work
         }
@@ -385,6 +438,7 @@ std::vector<GlobalVector*> setInitialConditions(
 bool solveOneTimeStepOneProcess(GlobalVector& x, std::size_t const timestep,
                                 double const t, double const delta_t,
                                 SingleProcessData& process_data,
+                                StaggeredCouplingTerm const& coupling_term,
                                 Output const& output_control)
 {
     auto& process = process_data.process;
@@ -405,35 +459,97 @@ bool solveOneTimeStepOneProcess(GlobalVector& x, std::size_t const timestep,
 
     applyKnownSolutions(ode_sys, nl_tag, x);
 
-    process.preTimestep(x, t, delta_t);
-
-    auto const post_iteration_callback = [&](
-        unsigned iteration, GlobalVector const& x) {
+    auto const post_iteration_callback = [&](unsigned iteration,
+                                             GlobalVector const& x) {
         output_control.doOutputNonlinearIteration(
             process, process_data.process_output, timestep, t, x, iteration);
     };
 
     bool nonlinear_solver_succeeded =
-        nonlinear_solver.solve(x, post_iteration_callback);
+        nonlinear_solver.solve(x, coupling_term, post_iteration_callback);
 
     auto& mat_strg = *process_data.mat_strg;
     time_disc.pushState(t, x, mat_strg);
 
-    process.postTimestep(x);
-
     return nonlinear_solver_succeeded;
 }
 
 UncoupledProcessesTimeLoop::UncoupledProcessesTimeLoop(
     std::unique_ptr<NumLib::ITimeStepAlgorithm>&& timestepper,
     std::unique_ptr<Output>&& output,
-    std::vector<std::unique_ptr<SingleProcessData>>&& per_process_data)
+    std::vector<std::unique_ptr<SingleProcessData>>&& per_process_data,
+    const unsigned global_coupling_max_iterations,
+    std::unique_ptr<NumLib::ConvergenceCriterion>&& global_coupling_conv_crit)
     : _timestepper{std::move(timestepper)},
       _output(std::move(output)),
-      _per_process_data(std::move(per_process_data))
+      _per_process_data(std::move(per_process_data)),
+      _global_coupling_max_iterations(global_coupling_max_iterations),
+      _global_coupling_conv_crit(std::move(global_coupling_conv_crit))
+{
+}
+
+bool UncoupledProcessesTimeLoop::setCoupledSolutions()
 {
+    // Do nothing if process are not coupled
+    if (!_global_coupling_conv_crit && _global_coupling_max_iterations == 1)
+        return false;
+
+    unsigned pcs_idx = 0;
+    _solutions_of_coupled_processes.reserve(_per_process_data.size());
+    for (auto& spd : _per_process_data)
+    {
+        auto const& coupled_processes = spd->coupled_processes;
+        std::unordered_map<std::type_index, GlobalVector const&> coupled_xs;
+        for (auto const& coupled_process_pair : coupled_processes)
+        {
+            ProcessLib::Process const& coupled_process =
+                coupled_process_pair.second;
+            auto const found_item = std::find_if(
+                _per_process_data.begin(),
+                _per_process_data.end(),
+                [&coupled_process](
+                    std::unique_ptr<SingleProcessData> const& item) {
+                    return std::type_index(typeid(coupled_process)) ==
+                           std::type_index(typeid(item->process));
+                });
+
+            if (found_item != _per_process_data.end())
+            {
+                // Id of the coupled process:
+                const std::size_t c_id =
+                    std::distance(_per_process_data.begin(), found_item);
+
+                BaseLib::insertIfTypeIndexKeyUniqueElseError(
+                    coupled_xs, coupled_process_pair.first,
+                    *_process_solutions[c_id], "global_coupled_x");
+            }
+        }
+        _solutions_of_coupled_processes.emplace_back(coupled_xs);
+
+        auto const x = *_process_solutions[pcs_idx];
+
+        // Create a vector to store the solution of the last coupling iteration
+        auto& x_coupling0 = NumLib::GlobalVectorProvider::provider.getVector(x);
+        MathLib::LinAlg::copy(x, x_coupling0);
+
+        // append a solution vector of suitable size
+        _solutions_of_last_cpl_iteration.emplace_back(&x_coupling0);
+
+        ++pcs_idx;
+    }  // end of for (auto& spd : _per_process_data)
+
+    return true;
 }
 
+/*
+ * TODO:
+ * Now we have a structure inside the time loop which is very similar to the
+ * nonlinear solver. And admittedly, the control flow inside the nonlinear
+ * solver is rather complicated. Maybe in the future con can introduce an
+ * abstraction that can do both the convergence checks of the coupling loop and
+ * of the nonlinear solver.
+ *
+ */
 bool UncoupledProcessesTimeLoop::loop()
 {
     // initialize output, convergence criterion, etc.
@@ -448,7 +564,8 @@ bool UncoupledProcessesTimeLoop::loop()
 
             if (auto* conv_crit =
                     dynamic_cast<NumLib::ConvergenceCriterionPerComponent*>(
-                        spd->conv_crit.get())) {
+                        spd->conv_crit.get()))
+            {
                 conv_crit->setDOFTable(pcs.getDOFTable(), pcs.getMesh());
             }
 
@@ -475,6 +592,8 @@ bool UncoupledProcessesTimeLoop::loop()
         }
     }
 
+    const bool is_staggered_coupling = setCoupledSolutions();
+
     double t = t0;
     std::size_t timestep = 1;  // the first timestep really is number one
     bool nonlinear_solver_succeeded = true;
@@ -492,7 +611,96 @@ bool UncoupledProcessesTimeLoop::loop()
         INFO("=== timestep #%u (t=%gs, dt=%gs) ==============================",
              timestep, t, delta_t);
 
+        if (is_staggered_coupling)
+            nonlinear_solver_succeeded =
+                solveCoupledEquationSystemsByStaggeredScheme(t, delta_t,
+                                                             timestep);
+        else
+            nonlinear_solver_succeeded =
+                solveUncoupledEquationSystems(t, delta_t, timestep);
+
+        INFO("[time] Time step #%u took %g s.", timestep,
+             time_timestep.elapsed());
+
+        if (!nonlinear_solver_succeeded)
+            break;
+    }
+
+    // output last time step
+    if (nonlinear_solver_succeeded)
+    {
+        unsigned pcs_idx = 0;
+        for (auto& spd : _per_process_data)
+        {
+            auto& pcs = spd->process;
+            auto const& x = *_process_solutions[pcs_idx];
+            _output->doOutputLastTimestep(pcs, spd->process_output, timestep, t,
+                                          x);
+
+            ++pcs_idx;
+        }
+    }
+
+    return nonlinear_solver_succeeded;
+}
+
+bool UncoupledProcessesTimeLoop::solveUncoupledEquationSystems(
+    const double t, const double dt, const std::size_t timestep_id)
+{
+    // TODO use process name
+    unsigned pcs_idx = 0;
+    for (auto& spd : _per_process_data)
+    {
+        auto& pcs = spd->process;
+        BaseLib::RunTime time_timestep_process;
+        time_timestep_process.start();
+
+        auto& x = *_process_solutions[pcs_idx];
+        pcs.preTimestep(x, t, dt);
+
+        const auto nonlinear_solver_succeeded = solveOneTimeStepOneProcess(
+            x, timestep_id, t, dt, *spd,
+            ProcessLib::createVoidStaggeredCouplingTerm(), *_output);
+        pcs.postTimestep(x);
+        pcs.computeSecondaryVariable(t, x);
+
+        INFO("[time] Solving process #%u took %g s in time step #%u ", pcs_idx,
+             time_timestep_process.elapsed(), timestep_id);
+
+        if (!nonlinear_solver_succeeded)
+        {
+            ERR("The nonlinear solver failed in time step #%u at t = %g "
+                "s for process #%u.",
+                timestep_id, t, pcs_idx);
+
+            // save unsuccessful solution
+            _output->doOutputAlways(pcs, spd->process_output, timestep_id, t,
+                                    x);
+
+            return false;
+        }
+        else
+        {
+            _output->doOutput(pcs, spd->process_output, timestep_id, t, x);
+        }
+
+        ++pcs_idx;
+    }  // end of for (auto& spd : _per_process_data)
+
+    return true;
+}
+
+bool UncoupledProcessesTimeLoop::solveCoupledEquationSystemsByStaggeredScheme(
+    const double t, const double dt, const std::size_t timestep_id)
+{
+    // Coupling iteration
+    bool coupling_iteration_converged = true;
+    for (unsigned global_coupling_iteration = 0;
+         global_coupling_iteration < _global_coupling_max_iterations;
+         global_coupling_iteration++)
+    {
         // TODO use process name
+        bool nonlinear_solver_succeeded = true;
         unsigned pcs_idx = 0;
         for (auto& spd : _per_process_data)
         {
@@ -501,62 +709,103 @@ bool UncoupledProcessesTimeLoop::loop()
             time_timestep_process.start();
 
             auto& x = *_process_solutions[pcs_idx];
+            if (global_coupling_iteration == 0)
+            {
+                // Copy the solution of the previous time step to a vector that
+                // belongs to process. For some problems, both of the current
+                // solution and the solution of the previous time step are
+                // required for the coupling computation.
+                pcs.preTimestep(x, t, dt);
+
+                // Set the flag of the first iteration be true.
+                _global_coupling_conv_crit->preFirstIteration();
+            }
+            else
+            {
+                // Set the flag of the first iteration be false.
+                _global_coupling_conv_crit->setNoFirstIteration();
+            }
+            StaggeredCouplingTerm coupling_term(
+                spd->coupled_processes,
+                _solutions_of_coupled_processes[pcs_idx], dt);
 
-            nonlinear_solver_succeeded = solveOneTimeStepOneProcess(
-                x, timestep, t, delta_t, *spd, *_output);
-
-            pcs.computeSecondaryVariable(t, x);
+            const auto nonlinear_solver_succeeded = solveOneTimeStepOneProcess(
+                x, timestep_id, t, dt, *spd, coupling_term, *_output);
 
-            INFO("[time] Solving process #%u took %g s in timestep #%u.",
-                 pcs_idx, time_timestep.elapsed(), timestep);
+            INFO(
+                "[time] Solving process #%u took %g s in time step #%u "
+                " coupling iteration #%u",
+                pcs_idx, time_timestep_process.elapsed(), timestep_id,
+                global_coupling_iteration);
 
             if (!nonlinear_solver_succeeded)
             {
-                ERR("The nonlinear solver failed in timestep #%u at t = %g s"
+                ERR("The nonlinear solver failed in time step #%u at t = %g "
+                    "s"
                     " for process #%u.",
-                    timestep, t, pcs_idx);
+                    timestep_id, t, pcs_idx);
 
                 // save unsuccessful solution
-                _output->doOutputAlways(pcs, spd->process_output, timestep, t, x);
+                _output->doOutputAlways(pcs, spd->process_output, timestep_id,
+                                        t, x);
 
                 break;
             }
-            else
-            {
-                _output->doOutput(pcs, spd->process_output, timestep, t, x);
-            }
+
+            // Check the convergence of the coupling iteration
+            auto& x_old = *_solutions_of_last_cpl_iteration[pcs_idx];
+            MathLib::LinAlg::axpy(x_old, -1.0, x);
+            _global_coupling_conv_crit->checkResidual(x_old);
+            coupling_iteration_converged =
+                coupling_iteration_converged &&
+                _global_coupling_conv_crit->isSatisfied();
+
+            if (coupling_iteration_converged)
+                break;
+            MathLib::LinAlg::copy(x, x_old);
 
             ++pcs_idx;
-        }
+        }  // end of for (auto& spd : _per_process_data)
 
-        INFO("[time] Timestep #%u took %g s.", timestep,
-             time_timestep.elapsed());
+        if (coupling_iteration_converged)
+            break;
 
         if (!nonlinear_solver_succeeded)
-            break;
+        {
+            return false;
+        }
     }
 
-    // output last timestep
-    if (nonlinear_solver_succeeded)
+    if (!coupling_iteration_converged)
     {
-        unsigned pcs_idx = 0;
-        for (auto& spd : _per_process_data)
-        {
-            auto& pcs = spd->process;
-            auto const& x = *_process_solutions[pcs_idx];
-            _output->doOutputLastTimestep(pcs, spd->process_output, timestep, t, x);
+        WARN(
+            "The coupling iterations reaches its maximum number in time step"
+            "#%u at t = %g s",
+            timestep_id, t);
+    }
 
-            ++pcs_idx;
-        }
+    unsigned pcs_idx = 0;
+    for (auto& spd : _per_process_data)
+    {
+        auto& pcs = spd->process;
+        auto& x = *_process_solutions[pcs_idx];
+        pcs.postTimestep(x);
+        pcs.computeSecondaryVariable(t, x);
+
+        _output->doOutput(pcs, spd->process_output, timestep_id, t, x);
+        ++pcs_idx;
     }
 
-    return nonlinear_solver_succeeded;
+    return true;
 }
 
 UncoupledProcessesTimeLoop::~UncoupledProcessesTimeLoop()
 {
     for (auto* x : _process_solutions)
         NumLib::GlobalVectorProvider::provider.releaseVector(*x);
+
+    for (auto* x : _solutions_of_last_cpl_iteration)
+        NumLib::GlobalVectorProvider::provider.releaseVector(*x);
 }
 
 }  // namespace ProcessLib
diff --git a/ProcessLib/UncoupledProcessesTimeLoop.h b/ProcessLib/UncoupledProcessesTimeLoop.h
index 3d6f74dde46d282100cb5f7436e42820350b6cc0..497e1276a9238786284f560d9a33c42aab076800 100644
--- a/ProcessLib/UncoupledProcessesTimeLoop.h
+++ b/ProcessLib/UncoupledProcessesTimeLoop.h
@@ -10,6 +10,10 @@
 #pragma once
 
 #include <memory>
+#include <unordered_map>
+#include <typeinfo>
+#include <typeindex>
+
 #include <logog/include/logog.hpp>
 
 #include "NumLib/ODESolver/NonlinearSolver.h"
@@ -18,35 +22,100 @@
 #include "Output.h"
 #include "Process.h"
 
+namespace NumLib
+{
+class ConvergenceCriterion;
+}
+
 namespace ProcessLib
 {
 struct SingleProcessData;
 
-//! Time loop capable of time-integrating several uncoupled processes at once.
+/// Time loop capable of time-integrating several processes at once.
+/// TODO: Rename to, e.g., TimeLoop, since it is not for purely uncoupled stuff
+/// anymore.
 class UncoupledProcessesTimeLoop
 {
 public:
     explicit UncoupledProcessesTimeLoop(
         std::unique_ptr<NumLib::ITimeStepAlgorithm>&& timestepper,
         std::unique_ptr<Output>&& output,
-        std::vector<std::unique_ptr<SingleProcessData>>&& per_process_data);
+        std::vector<std::unique_ptr<SingleProcessData>>&& per_process_data,
+        const unsigned global_coupling_max_iterations,
+        std::unique_ptr<NumLib::ConvergenceCriterion>&&
+            global_coupling_conv_crit);
 
     bool loop();
 
     ~UncoupledProcessesTimeLoop();
 
+    /**
+     *  This function fills the vector of solutions of coupled processes of
+     *  processes, _solutions_of_coupled_processes, and initializes the vector
+     * of
+     *  solutions of the previous coupling iteration,
+     *  _solutions_of_last_cpl_iteration.
+     *
+     *  \return a boolean value as a flag to indicate there should be a coupling
+     *          among processes or not.
+     */
+    bool setCoupledSolutions();
+
 private:
     std::vector<GlobalVector*> _process_solutions;
     std::unique_ptr<NumLib::ITimeStepAlgorithm> _timestepper;
     std::unique_ptr<Output> _output;
     std::vector<std::unique_ptr<SingleProcessData>> _per_process_data;
+
+    /// Maximum iterations of the global coupling.
+    const unsigned _global_coupling_max_iterations;
+    /// Convergence criteria of the global coupling iterations.
+    std::unique_ptr<NumLib::ConvergenceCriterion> _global_coupling_conv_crit;
+
+    /**
+     *  Vector of solutions of coupled processes of processes.
+     *  Each vector element stores the references of the solution vectors
+     *  (stored in _process_solutions) of the coupled processes of a process.
+     */
+    std::vector<std::unordered_map<std::type_index, GlobalVector const&>>
+        _solutions_of_coupled_processes;
+
+    /// Solutions of the previous coupling iteration for the convergence
+    /// criteria of the coupling iteration.
+    std::vector<GlobalVector*> _solutions_of_last_cpl_iteration;
+
+    /**
+     * \brief Member to solver non coupled systems of equations, which can be
+     *        a single system of equations, or several systems of equations
+     *        without any dependency among the different systems.
+     *
+     * @param t           Current time
+     * @param dt          Time step size
+     * @param timestep_id Index of the time step
+     * @return            true:  if all nonlinear solvers convergence.
+     *                    false: if any of nonlinear solvers divergences.
+     */
+    bool solveUncoupledEquationSystems(const double t, const double dt,
+                                       const std::size_t timestep_id);
+
+    /**
+     * \brief Member to solver coupled systems of equations by the staggered
+     *        scheme.
+     *
+     * @param t           Current time
+     * @param dt          Time step size
+     * @param timestep_id Index of the time step
+     * @return            true:   if all nonlinear solvers convergence.
+     *                    false:  if any of nonlinear solvers divergences.
+     */
+    bool solveCoupledEquationSystemsByStaggeredScheme(
+        const double t, const double dt, const std::size_t timestep_id);
 };
 
 //! Builds an UncoupledProcessesTimeLoop from the given configuration.
 std::unique_ptr<UncoupledProcessesTimeLoop> createUncoupledProcessesTimeLoop(
     BaseLib::ConfigTree const& config, std::string const& output_directory,
-    std::map<std::string, std::unique_ptr<Process>> const&
-        processes,
+    std::map<std::string, std::unique_ptr<Process>> const& processes,
     std::map<std::string, std::unique_ptr<NumLib::NonlinearSolverBase>> const&
         nonlinear_solvers);
 
diff --git a/ProcessLib/VectorMatrixAssembler.cpp b/ProcessLib/VectorMatrixAssembler.cpp
index dda159cf6ab5061d69c2537c4db04d0b1899ef38..a82bd7893396397cfea906f41a591b299fb88776 100644
--- a/ProcessLib/VectorMatrixAssembler.cpp
+++ b/ProcessLib/VectorMatrixAssembler.cpp
@@ -15,8 +15,61 @@
 #include "MathLib/LinAlg/Eigen/EigenMapTools.h"
 #include "LocalAssemblerInterface.h"
 
+#include "Process.h"
+
 namespace ProcessLib
 {
+static std::unordered_map<std::type_index, const std::vector<double>>
+getPreviousLocalSolutionsOfCoupledProcesses(
+    const StaggeredCouplingTerm& coupling_term,
+    const std::vector<GlobalIndexType>& indices)
+{
+    std::unordered_map<std::type_index, const std::vector<double>>
+        local_coupled_xs0;
+
+    for (auto const& coupled_process_pair : coupling_term.coupled_processes)
+    {
+        auto const& coupled_pcs = coupled_process_pair.second;
+        auto const prevous_time_x = coupled_pcs.getPreviousTimeStepSolution();
+        if (prevous_time_x)
+        {
+            auto const local_coupled_x0 = prevous_time_x->get(indices);
+            BaseLib::insertIfTypeIndexKeyUniqueElseError(
+                local_coupled_xs0, coupled_process_pair.first, local_coupled_x0,
+                "local_coupled_x0");
+        }
+        else
+        {
+            const std::vector<double> local_coupled_x0;
+            BaseLib::insertIfTypeIndexKeyUniqueElseError(
+                local_coupled_xs0, coupled_process_pair.first, local_coupled_x0,
+                "local_coupled_x0");
+        }
+    }
+    return local_coupled_xs0;
+}
+
+static std::unordered_map<std::type_index, const std::vector<double>>
+getCurrentLocalSolutionsOfCoupledProcesses(
+    const std::unordered_map<std::type_index, GlobalVector const&>&
+        global_coupled_xs,
+    const std::vector<GlobalIndexType>& indices)
+{
+    std::unordered_map<std::type_index, const std::vector<double>>
+        local_coupled_xs;
+
+    // Get local nodal solutions of the coupled equations.
+    for (auto const& global_coupled_x_pair : global_coupled_xs)
+    {
+        auto const& coupled_x = global_coupled_x_pair.second;
+        auto const local_coupled_x = coupled_x.get(indices);
+        BaseLib::insertIfTypeIndexKeyUniqueElseError(
+            local_coupled_xs, global_coupled_x_pair.first, local_coupled_x,
+            "local_coupled_x");
+    }
+    return local_coupled_xs;
+}
+
 VectorMatrixAssembler::VectorMatrixAssembler(
     std::unique_ptr<AbstractJacobianAssembler>&& jacobian_assembler)
     : _jacobian_assembler(std::move(jacobian_assembler))
@@ -26,7 +79,8 @@ VectorMatrixAssembler::VectorMatrixAssembler(
 void VectorMatrixAssembler::assemble(
     const std::size_t mesh_item_id, LocalAssemblerInterface& local_assembler,
     const NumLib::LocalToGlobalIndexMap& dof_table, const double t,
-    const GlobalVector& x, GlobalMatrix& M, GlobalMatrix& K, GlobalVector& b)
+    const GlobalVector& x, GlobalMatrix& M, GlobalMatrix& K, GlobalVector& b,
+    const StaggeredCouplingTerm& coupling_term)
 {
     auto const indices = NumLib::getIndices(mesh_item_id, dof_table);
     auto const local_x = x.get(indices);
@@ -35,21 +89,42 @@ void VectorMatrixAssembler::assemble(
     _local_K_data.clear();
     _local_b_data.clear();
 
-    local_assembler.assemble(t, local_x, _local_M_data, _local_K_data, _local_b_data);
+    if (coupling_term.empty)
+    {
+        local_assembler.assemble(t, local_x, _local_M_data, _local_K_data,
+                                 _local_b_data);
+    }
+    else
+    {
+        auto local_coupled_xs0 =
+            getPreviousLocalSolutionsOfCoupledProcesses(coupling_term, indices);
+        auto local_coupled_xs = getCurrentLocalSolutionsOfCoupledProcesses(
+            coupling_term.coupled_xs, indices);
+        ProcessLib::LocalCouplingTerm local_coupling_term(
+            coupling_term.dt, coupling_term.coupled_processes,
+            std::move(local_coupled_xs0), std::move(local_coupled_xs));
+
+        local_assembler.coupling_assemble(t, local_x, _local_M_data,
+                                          _local_K_data, _local_b_data,
+                                          local_coupling_term);
+    }
 
     auto const num_r_c = indices.size();
     auto const r_c_indices =
         NumLib::LocalToGlobalIndexMap::RowColumnIndices(indices, indices);
 
-    if (!_local_M_data.empty()) {
+    if (!_local_M_data.empty())
+    {
         auto const local_M = MathLib::toMatrix(_local_M_data, num_r_c, num_r_c);
         M.add(r_c_indices, local_M);
     }
-    if (!_local_K_data.empty()) {
+    if (!_local_K_data.empty())
+    {
         auto const local_K = MathLib::toMatrix(_local_K_data, num_r_c, num_r_c);
         K.add(r_c_indices, local_K);
     }
-    if (!_local_b_data.empty()) {
+    if (!_local_b_data.empty())
+    {
         assert(_local_b_data.size() == num_r_c);
         b.add(indices, _local_b_data);
     }
@@ -60,7 +135,7 @@ void VectorMatrixAssembler::assembleWithJacobian(
     NumLib::LocalToGlobalIndexMap const& dof_table, const double t,
     GlobalVector const& x, GlobalVector const& xdot, const double dxdot_dx,
     const double dx_dx, GlobalMatrix& M, GlobalMatrix& K, GlobalVector& b,
-    GlobalMatrix& Jac)
+    GlobalMatrix& Jac, const StaggeredCouplingTerm& coupling_term)
 {
     auto const indices = NumLib::getIndices(mesh_item_id, dof_table);
     auto const local_x = x.get(indices);
@@ -71,31 +146,55 @@ void VectorMatrixAssembler::assembleWithJacobian(
     _local_b_data.clear();
     _local_Jac_data.clear();
 
-    _jacobian_assembler->assembleWithJacobian(
-        local_assembler, t, local_x, local_xdot, dxdot_dx, dx_dx, _local_M_data,
-        _local_K_data, _local_b_data, _local_Jac_data);
+    if (coupling_term.empty)
+    {
+        _jacobian_assembler->assembleWithJacobian(
+            local_assembler, t, local_x, local_xdot, dxdot_dx, dx_dx,
+            _local_M_data, _local_K_data, _local_b_data, _local_Jac_data);
+    }
+    else
+    {
+        auto local_coupled_xs0 =
+            getPreviousLocalSolutionsOfCoupledProcesses(coupling_term, indices);
+        auto local_coupled_xs = getCurrentLocalSolutionsOfCoupledProcesses(
+            coupling_term.coupled_xs, indices);
+        ProcessLib::LocalCouplingTerm local_coupling_term(
+            coupling_term.dt, coupling_term.coupled_processes,
+            std::move(local_coupled_xs0), std::move(local_coupled_xs));
+
+        _jacobian_assembler->assembleWithJacobianAndCouping(
+            local_assembler, t, local_x, local_xdot, dxdot_dx, dx_dx,
+            _local_M_data, _local_K_data, _local_b_data, _local_Jac_data,
+            local_coupling_term);
+    }
 
     auto const num_r_c = indices.size();
     auto const r_c_indices =
         NumLib::LocalToGlobalIndexMap::RowColumnIndices(indices, indices);
 
-    if (!_local_M_data.empty()) {
+    if (!_local_M_data.empty())
+    {
         auto const local_M = MathLib::toMatrix(_local_M_data, num_r_c, num_r_c);
         M.add(r_c_indices, local_M);
     }
-    if (!_local_K_data.empty()) {
+    if (!_local_K_data.empty())
+    {
         auto const local_K = MathLib::toMatrix(_local_K_data, num_r_c, num_r_c);
         K.add(r_c_indices, local_K);
     }
-    if (!_local_b_data.empty()) {
+    if (!_local_b_data.empty())
+    {
         assert(_local_b_data.size() == num_r_c);
         b.add(indices, _local_b_data);
     }
-    if (!_local_Jac_data.empty()) {
+    if (!_local_Jac_data.empty())
+    {
         auto const local_Jac =
             MathLib::toMatrix(_local_Jac_data, num_r_c, num_r_c);
         Jac.add(r_c_indices, local_Jac);
-    } else {
+    }
+    else
+    {
         OGS_FATAL(
             "No Jacobian has been assembled! This might be due to programming "
             "errors in the local assembler of the current process.");
diff --git a/ProcessLib/VectorMatrixAssembler.h b/ProcessLib/VectorMatrixAssembler.h
index a017a00ab0050b4420d898588e0bf3a2106ee72c..737805a75efab29cdfdf324bfb89dfb7661a4415 100644
--- a/ProcessLib/VectorMatrixAssembler.h
+++ b/ProcessLib/VectorMatrixAssembler.h
@@ -12,6 +12,7 @@
 #include <vector>
 #include "NumLib/NumericsConfig.h"
 #include "AbstractJacobianAssembler.h"
+#include "StaggeredCouplingTerm.h"
 
 namespace NumLib
 {
@@ -38,7 +39,8 @@ public:
                   LocalAssemblerInterface& local_assembler,
                   NumLib::LocalToGlobalIndexMap const& dof_table,
                   double const t, GlobalVector const& x, GlobalMatrix& M,
-                  GlobalMatrix& K, GlobalVector& b);
+                  GlobalMatrix& K, GlobalVector& b,
+                  const StaggeredCouplingTerm& coupling_term);
 
     //! Assembles \c M, \c K, \c b, and the Jacobian \c Jac of the residual.
     //! \note The Jacobian must be assembled.
@@ -49,7 +51,8 @@ public:
                               GlobalVector const& xdot, const double dxdot_dx,
                               const double dx_dx, GlobalMatrix& M,
                               GlobalMatrix& K, GlobalVector& b,
-                              GlobalMatrix& Jac);
+                              GlobalMatrix& Jac,
+                              const StaggeredCouplingTerm& coupling_term);
 
 private:
     // temporary data only stored here in order to avoid frequent memory
@@ -63,4 +66,4 @@ private:
     std::unique_ptr<AbstractJacobianAssembler> _jacobian_assembler;
 };
 
-}   // namespace ProcessLib
+}  // namespace ProcessLib
diff --git a/Tests/NumLib/ODEs.h b/Tests/NumLib/ODEs.h
index 848b62cda93f2e290b1649be0a858f7402b0d795..0350116e05203811c228d3dba7cec79450ba5c6e 100644
--- a/Tests/NumLib/ODEs.h
+++ b/Tests/NumLib/ODEs.h
@@ -13,6 +13,7 @@
 #include "MathLib/LinAlg/LinAlg.h"
 #include "MathLib/LinAlg/UnifiedMatrixSetters.h"
 #include "NumLib/ODESolver/ODESystem.h"
+#include "ProcessLib/StaggeredCouplingTerm.h"
 
 // debug
 //#include <iostream>
@@ -27,7 +28,9 @@ class ODE1 final : public NumLib::ODESystem<
 {
 public:
     void assemble(const double /*t*/, GlobalVector const& /*x*/,
-                  GlobalMatrix& M, GlobalMatrix& K, GlobalVector& b) override
+                  GlobalMatrix& M, GlobalMatrix& K, GlobalVector& b,
+                  ProcessLib::StaggeredCouplingTerm const& /*coupling_term*/
+                 ) override
     {
         MathLib::setMatrix(M, { 1.0, 0.0,  0.0, 1.0 });
         MathLib::setMatrix(K, { 0.0, 1.0, -1.0, 0.0 });
@@ -39,11 +42,13 @@ public:
                               GlobalVector const& /*xdot*/, const double dxdot_dx,
                               const double dx_dx, GlobalMatrix& M,
                               GlobalMatrix& K, GlobalVector& b,
-                              GlobalMatrix& Jac) override
+                              GlobalMatrix& Jac,
+                              ProcessLib::StaggeredCouplingTerm
+                              const& coupling_term) override
     {
         namespace LinAlg = MathLib::LinAlg;
 
-        assemble(t, x_curr, M, K, b);
+        assemble(t, x_curr, M, K, b, coupling_term);
 
         // compute Jac = M*dxdot_dx + dx_dx*K
         LinAlg::finalizeAssembly(M);
@@ -103,7 +108,9 @@ class ODE2 final : public NumLib::ODESystem<
 {
 public:
     void assemble(const double /*t*/, GlobalVector const& x, GlobalMatrix& M,
-                  GlobalMatrix& K, GlobalVector& b) override
+                  GlobalMatrix& K, GlobalVector& b,
+                  ProcessLib::StaggeredCouplingTerm const& /*coupling_term*/
+                 ) override
     {
         MathLib::setMatrix(M, {1.0});
         MathLib::setMatrix(K, {x[0]});
@@ -114,9 +121,11 @@ public:
                               GlobalVector const& /*xdot*/,
                               const double dxdot_dx, const double dx_dx,
                               GlobalMatrix& M, GlobalMatrix& K, GlobalVector& b,
-                              GlobalMatrix& Jac) override
+                              GlobalMatrix& Jac,
+                              ProcessLib::StaggeredCouplingTerm const&
+                              coupling_term) override
     {
-        assemble(t, x, M, K, b);
+        assemble(t, x, M, K, b, coupling_term);
 
         namespace LinAlg = MathLib::LinAlg;
 
@@ -187,7 +196,9 @@ class ODE3 final : public NumLib::ODESystem<
 {
 public:
     void assemble(const double /*t*/, GlobalVector const& x_curr, GlobalMatrix& M,
-                  GlobalMatrix& K, GlobalVector& b) override
+                  GlobalMatrix& K, GlobalVector& b,
+                  ProcessLib::StaggeredCouplingTerm const& /*coupling_term*/
+                 ) override
     {
         auto const u = x_curr[0];
         auto const v = x_curr[1];
@@ -202,9 +213,11 @@ public:
                               GlobalVector const& xdot, const double dxdot_dx,
                               const double dx_dx, GlobalMatrix& M,
                               GlobalMatrix& K, GlobalVector& b,
-                              GlobalMatrix& Jac) override
+                              GlobalMatrix& Jac,
+                              ProcessLib::StaggeredCouplingTerm const&
+                              coupling_term) override
     {
-        assemble(t, x_curr, M, K, b);
+        assemble(t, x_curr, M, K, b, coupling_term);
 
         auto const u = x_curr[0];
         auto const v = x_curr[1];
diff --git a/Tests/NumLib/TimeLoopSingleODE.h b/Tests/NumLib/TimeLoopSingleODE.h
index 1c67bfe6edc04f2d0b2f957f550d18dac04828b8..531ef5432da06a84d30d808f993998ead616d8e2 100644
--- a/Tests/NumLib/TimeLoopSingleODE.h
+++ b/Tests/NumLib/TimeLoopSingleODE.h
@@ -13,6 +13,7 @@
 #include "NumLib/ODESolver/TimeDiscretizedODESystem.h"
 #include "NumLib/ODESolver/NonlinearSolver.h"
 
+#include "ProcessLib/StaggeredCouplingTerm.h"
 
 namespace NumLib
 {
@@ -96,7 +97,8 @@ bool TimeLoopSingleODE<NLTag>::loop(const double t0, GlobalVector const& x0,
 
     if (time_disc.needsPreload())
     {
-        _nonlinear_solver->assemble(x);
+        _nonlinear_solver->assemble(x,
+                                 ProcessLib::createVoidStaggeredCouplingTerm());
         time_disc.pushState(t0, x0,
                             _ode_sys);  // TODO: that might do duplicate work
     }
@@ -112,7 +114,9 @@ bool TimeLoopSingleODE<NLTag>::loop(const double t0, GlobalVector const& x0,
         // INFO("time: %e, delta_t: %e", t, delta_t);
         time_disc.nextTimestep(t, delta_t);
 
-        nl_slv_succeeded = _nonlinear_solver->solve(x, nullptr);
+        nl_slv_succeeded = _nonlinear_solver->solve(x,
+                                ProcessLib::createVoidStaggeredCouplingTerm(),
+                                nullptr);
         if (!nl_slv_succeeded)
             break;