diff --git a/Documentation/ProjectFile/prj/processes/process/HT/solid_thermal_expansion/i_solid_thermal_expansion.md b/Documentation/ProjectFile/prj/processes/process/HT/solid_thermal_expansion/i_solid_thermal_expansion.md
new file mode 100644
index 0000000000000000000000000000000000000000..70cabb8294b4f5438a5b29bafc869e152149f93e
--- /dev/null
+++ b/Documentation/ProjectFile/prj/processes/process/HT/solid_thermal_expansion/i_solid_thermal_expansion.md
@@ -0,0 +1,2 @@
+Parameters of solid thermal expansion in flow process.
+
diff --git a/Documentation/ProjectFile/prj/processes/process/LIQUID_FLOW/material_property/solid/t_biot_constant.md b/Documentation/ProjectFile/prj/processes/process/HT/solid_thermal_expansion/t_biot_constant.md
similarity index 100%
rename from Documentation/ProjectFile/prj/processes/process/LIQUID_FLOW/material_property/solid/t_biot_constant.md
rename to Documentation/ProjectFile/prj/processes/process/HT/solid_thermal_expansion/t_biot_constant.md
diff --git a/Documentation/ProjectFile/prj/processes/process/HT/solid_thermal_expansion/t_thermal_expansion.md b/Documentation/ProjectFile/prj/processes/process/HT/solid_thermal_expansion/t_thermal_expansion.md
new file mode 100644
index 0000000000000000000000000000000000000000..8dbed74f077d47fdfa68f8fcfcb4c2e96442741c
--- /dev/null
+++ b/Documentation/ProjectFile/prj/processes/process/HT/solid_thermal_expansion/t_thermal_expansion.md
@@ -0,0 +1 @@
+Line thermal expansion of solid.
diff --git a/Documentation/ProjectFile/prj/processes/process/HT/t_coupling_scheme.md b/Documentation/ProjectFile/prj/processes/process/HT/t_coupling_scheme.md
new file mode 100644
index 0000000000000000000000000000000000000000..b58ddae1e2276cc1d0042c9afdc34d8d60a8a5d3
--- /dev/null
+++ b/Documentation/ProjectFile/prj/processes/process/HT/t_coupling_scheme.md
@@ -0,0 +1,2 @@
+An optional input to select the coupling scheme. The variable is 'staggered'.
+If this input is omitted, the coupling scheme is the monolithic scheme by default.
diff --git a/Documentation/ProjectFile/prj/processes/process/HT/t_fluid_reference_density.md b/Documentation/ProjectFile/prj/processes/process/HT/t_fluid_reference_density.md
deleted file mode 100644
index c6a5d6d663dfbe57eec3c3c4f63a283ae216f6b2..0000000000000000000000000000000000000000
--- a/Documentation/ProjectFile/prj/processes/process/HT/t_fluid_reference_density.md
+++ /dev/null
@@ -1,3 +0,0 @@
-Reference value of the fluid density used in the equation of state for the fluid
-density as well as in the calculation of the thermal dispersivity and head
-capacity.
diff --git a/Documentation/ProjectFile/prj/processes/process/HT/thermal_dispersivity/i_thermal_dispersivity.md b/Documentation/ProjectFile/prj/processes/process/HT/thermal_dispersivity/i_thermal_dispersivity.md
new file mode 100644
index 0000000000000000000000000000000000000000..570535f773e77fc750991eb140352285f56f3836
--- /dev/null
+++ b/Documentation/ProjectFile/prj/processes/process/HT/thermal_dispersivity/i_thermal_dispersivity.md
@@ -0,0 +1 @@
+Optional input for thermal dispersion properties.
diff --git a/Documentation/ProjectFile/prj/processes/process/HT/t_thermal_dispersivity_longitudinal.md b/Documentation/ProjectFile/prj/processes/process/HT/thermal_dispersivity/t_longitudinal.md
similarity index 100%
rename from Documentation/ProjectFile/prj/processes/process/HT/t_thermal_dispersivity_longitudinal.md
rename to Documentation/ProjectFile/prj/processes/process/HT/thermal_dispersivity/t_longitudinal.md
diff --git a/Documentation/ProjectFile/prj/processes/process/HT/t_thermal_dispersivity_transversal.md b/Documentation/ProjectFile/prj/processes/process/HT/thermal_dispersivity/t_transversal.md
similarity index 100%
rename from Documentation/ProjectFile/prj/processes/process/HT/t_thermal_dispersivity_transversal.md
rename to Documentation/ProjectFile/prj/processes/process/HT/thermal_dispersivity/t_transversal.md
diff --git a/Documentation/ProjectFile/prj/processes/process/HYDRO_MECHANICS/t_coupling_scheme.md b/Documentation/ProjectFile/prj/processes/process/HYDRO_MECHANICS/t_coupling_scheme.md
new file mode 100644
index 0000000000000000000000000000000000000000..c4be227511fbd428f9f3bbbf14e19d9ebcf011b0
--- /dev/null
+++ b/Documentation/ProjectFile/prj/processes/process/HYDRO_MECHANICS/t_coupling_scheme.md
@@ -0,0 +1,2 @@
+An optional input to select the coupling scheme. So far, only the monolithic
+scheme is available for HYDRO_MECHANICS, and this input can be omitted.
diff --git a/Documentation/ProjectFile/prj/processes/process/HYDRO_MECHANICS_WITH_LIE/t_coupling_scheme.md b/Documentation/ProjectFile/prj/processes/process/HYDRO_MECHANICS_WITH_LIE/t_coupling_scheme.md
new file mode 100644
index 0000000000000000000000000000000000000000..3beece0c76a2f0afc8c56095847c7c250dca7b88
--- /dev/null
+++ b/Documentation/ProjectFile/prj/processes/process/HYDRO_MECHANICS_WITH_LIE/t_coupling_scheme.md
@@ -0,0 +1,2 @@
+An optional input to select the coupling scheme. So far, only the monolithic
+scheme is available for HYDRO_MECHANICS_WITH_LIE, and this input can be omitted.
diff --git a/Documentation/ProjectFile/prj/processes/process/LIQUID_FLOW/material_property/solid/i_solid.md b/Documentation/ProjectFile/prj/processes/process/LIQUID_FLOW/material_property/solid/i_solid.md
deleted file mode 100644
index 10e806ec9120b9a321904b4b461bd8da1468dd0c..0000000000000000000000000000000000000000
--- a/Documentation/ProjectFile/prj/processes/process/LIQUID_FLOW/material_property/solid/i_solid.md
+++ /dev/null
@@ -1 +0,0 @@
-Property of solid phase of liquid flow process.
diff --git a/Documentation/ProjectFile/prj/processes/process/LIQUID_FLOW/material_property/solid/t_thermal_expansion.md b/Documentation/ProjectFile/prj/processes/process/LIQUID_FLOW/material_property/solid/t_thermal_expansion.md
deleted file mode 100644
index 984c939c457d5a62efcddc947cc69541f9354d42..0000000000000000000000000000000000000000
--- a/Documentation/ProjectFile/prj/processes/process/LIQUID_FLOW/material_property/solid/t_thermal_expansion.md
+++ /dev/null
@@ -1 +0,0 @@
-Liquid thermal expansion.
diff --git a/Documentation/ProjectFile/prj/processes/process/PHASE_FIELD/t_coupling_scheme.md b/Documentation/ProjectFile/prj/processes/process/PHASE_FIELD/t_coupling_scheme.md
new file mode 100644
index 0000000000000000000000000000000000000000..70f06ad843096880156957d752ca5a213c6d4929
--- /dev/null
+++ b/Documentation/ProjectFile/prj/processes/process/PHASE_FIELD/t_coupling_scheme.md
@@ -0,0 +1,2 @@
+An optional input to select the coupling scheme. So far, only the monolithic
+scheme is available for PHASE_FIELD, and this input can be omitted.
diff --git a/Documentation/ProjectFile/prj/processes/process/RichardsComponentTransport/t_coupling_scheme.md b/Documentation/ProjectFile/prj/processes/process/RichardsComponentTransport/t_coupling_scheme.md
new file mode 100644
index 0000000000000000000000000000000000000000..4ad60d8ae0dd65644dfd380df7d8084c37586dd5
--- /dev/null
+++ b/Documentation/ProjectFile/prj/processes/process/RichardsComponentTransport/t_coupling_scheme.md
@@ -0,0 +1,3 @@
+An optional input to select the coupling scheme. So far, only the monolithic
+scheme is available for RichardsComponentTransport, and this input can be
+omitted.
diff --git a/Documentation/ProjectFile/prj/processes/process/THERMO_MECHANICS/t_coupling_scheme.md b/Documentation/ProjectFile/prj/processes/process/THERMO_MECHANICS/t_coupling_scheme.md
new file mode 100644
index 0000000000000000000000000000000000000000..fb1c11d02f545a428b6dafbf9fbf5393d78ad1a6
--- /dev/null
+++ b/Documentation/ProjectFile/prj/processes/process/THERMO_MECHANICS/t_coupling_scheme.md
@@ -0,0 +1,2 @@
+An optional input to select the coupling scheme. So far, only the monolithic
+scheme is available for THERMO_MECHANICS, and this input can be omitted.
diff --git a/Documentation/ProjectFile/prj/time_loop/processes/process/coupled_processes/i_coupled_processes.md b/Documentation/ProjectFile/prj/time_loop/processes/process/coupled_processes/i_coupled_processes.md
deleted file mode 100644
index 6550f6798a1430360cf2cdd0c1a50dad586cba28..0000000000000000000000000000000000000000
--- a/Documentation/ProjectFile/prj/time_loop/processes/process/coupled_processes/i_coupled_processes.md
+++ /dev/null
@@ -1 +0,0 @@
-Defines the coupled processes to a process.
\ No newline at end of file
diff --git a/Documentation/ProjectFile/prj/time_loop/processes/process/coupled_processes/t_coupled_process.md b/Documentation/ProjectFile/prj/time_loop/processes/process/coupled_processes/t_coupled_process.md
deleted file mode 100644
index 98b97648e209f0af17fb536111f40f2e6b90adc2..0000000000000000000000000000000000000000
--- a/Documentation/ProjectFile/prj/time_loop/processes/process/coupled_processes/t_coupled_process.md
+++ /dev/null
@@ -1 +0,0 @@
-Specifies a single coupled process to a process by name.
\ No newline at end of file
diff --git a/ProcessLib/AbstractJacobianAssembler.h b/ProcessLib/AbstractJacobianAssembler.h
index 50dde014d8a95ae0baac4bbdcfefd5b9c9fb1fd8..8645a26a9527f6f118ce237604c1069605f3475c 100644
--- a/ProcessLib/AbstractJacobianAssembler.h
+++ b/ProcessLib/AbstractJacobianAssembler.h
@@ -34,7 +34,6 @@ public:
     //! \f$b\f$ with coupling.
     virtual void assembleWithJacobianAndCoupling(
         LocalAssemblerInterface& /*local_assembler*/, double const /*t*/,
-        std::vector<double> const& /*local_x*/,
         std::vector<double> const& /*local_xdot*/, const double /*dxdot_dx*/,
         const double /*dx_dx*/, std::vector<double>& /*local_M_data*/,
         std::vector<double>& /*local_K_data*/,
diff --git a/ProcessLib/ComponentTransport/ComponentTransportProcess.cpp b/ProcessLib/ComponentTransport/ComponentTransportProcess.cpp
index bf446dacc2712176765335525a566d72b611d420..eb63cd1d42af3fe84ca4f70fe63a92fca512c481 100644
--- a/ProcessLib/ComponentTransport/ComponentTransportProcess.cpp
+++ b/ProcessLib/ComponentTransport/ComponentTransportProcess.cpp
@@ -22,13 +22,16 @@ ComponentTransportProcess::ComponentTransportProcess(
     std::unique_ptr<ProcessLib::AbstractJacobianAssembler>&& jacobian_assembler,
     std::vector<std::unique_ptr<ParameterBase>> const& parameters,
     unsigned const integration_order,
-    std::vector<std::reference_wrapper<ProcessVariable>>&& process_variables,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
+        process_variables,
     ComponentTransportProcessData&& process_data,
     SecondaryVariableCollection&& secondary_variables,
-    NumLib::NamedFunctionCaller&& named_function_caller)
+    NumLib::NamedFunctionCaller&& named_function_caller,
+    bool const use_monolithic_scheme)
     : Process(mesh, std::move(jacobian_assembler), parameters,
               integration_order, std::move(process_variables),
-              std::move(secondary_variables), std::move(named_function_caller)),
+              std::move(secondary_variables), std::move(named_function_caller),
+              use_monolithic_scheme),
       _process_data(std::move(process_data))
 {
 }
@@ -59,6 +62,8 @@ void ComponentTransportProcess::assembleConcreteProcess(
     GlobalVector& b)
 {
     DBUG("Assemble ComponentTransportProcess.");
+    if (!_use_monolithic_scheme)
+        setCoupledSolutionsOfPreviousTimeStep();
 
     // Call global assembler for each local assembly item.
     GlobalExecutor::executeMemberDereferenced(
@@ -73,6 +78,9 @@ void ComponentTransportProcess::assembleWithJacobianConcreteProcess(
 {
     DBUG("AssembleWithJacobian ComponentTransportProcess.");
 
+    if (!_use_monolithic_scheme)
+        setCoupledSolutionsOfPreviousTimeStep();
+
     // Call global assembler for each local assembly item.
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assembleWithJacobian,
diff --git a/ProcessLib/ComponentTransport/ComponentTransportProcess.h b/ProcessLib/ComponentTransport/ComponentTransportProcess.h
index 20f7dc87c0b1b6f74bb2c9c06ff01f9cd36d08a4..621fbfc97897c0fbfbe4c47237fed19adc8a9dc6 100644
--- a/ProcessLib/ComponentTransport/ComponentTransportProcess.h
+++ b/ProcessLib/ComponentTransport/ComponentTransportProcess.h
@@ -83,11 +83,12 @@ public:
             jacobian_assembler,
         std::vector<std::unique_ptr<ParameterBase>> const& parameters,
         unsigned const integration_order,
-        std::vector<std::reference_wrapper<ProcessVariable>>&&
+        std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
             process_variables,
         ComponentTransportProcessData&& process_data,
         SecondaryVariableCollection&& secondary_variables,
-        NumLib::NamedFunctionCaller&& named_function_caller);
+        NumLib::NamedFunctionCaller&& named_function_caller,
+        bool const use_monolithic_scheme);
 
     //! \name ODESystem interface
     //! @{
diff --git a/ProcessLib/ComponentTransport/CreateComponentTransportProcess.cpp b/ProcessLib/ComponentTransport/CreateComponentTransportProcess.cpp
index dbcdbd75a9314bafa91f1cfb7047b5efc6854d3c..9b3eeeafabe18b3be9ed66b3c8b3e86a8098b9ba 100644
--- a/ProcessLib/ComponentTransport/CreateComponentTransportProcess.cpp
+++ b/ProcessLib/ComponentTransport/CreateComponentTransportProcess.cpp
@@ -35,19 +35,43 @@ std::unique_ptr<Process> createComponentTransportProcess(
     config.checkConfigParameter("type", "ComponentTransport");
 
     DBUG("Create ComponentTransportProcess.");
+    auto const staggered_scheme =
+        //! \ogs_file_param{prj__processes__process__coupling_scheme}
+        config.getConfigParameterOptional<std::string>("coupling_scheme");
+    const bool use_monolithic_scheme =
+        (staggered_scheme && (*staggered_scheme == "staggered")) ? false : true;
 
     // Process variable.
 
     //! \ogs_file_param{prj__processes__process__ComponentTransport__process_variables}
     auto const pv_config = config.getConfigSubtree("process_variables");
 
-    auto process_variables = findProcessVariables(
-        variables, pv_config,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>
+        process_variables;
+    if (use_monolithic_scheme)  // monolithic scheme.
+    {
+        auto per_process_variables = findProcessVariables(
+            variables, pv_config,
+            {
+            //! \ogs_file_param_special{prj__processes__process__ComponentTransport__process_variables__concentration}
+             "concentration",
+             //! \ogs_file_param_special{prj__processes__process__ComponentTransport__process_variables__pressure}
+             "pressure"});
+        process_variables.push_back(std::move(per_process_variables));
+    }
+    else  // staggered scheme.
+    {
+        std::array<std::string, 2> variable_names = {
+            {"concentration",
+             "pressure"}};  // double-braces required in C++11 (not in C++14)
+
+        for (int i = 0; i < 2; i++)
         {
-        //! \ogs_file_param_special{prj__processes__process__ComponentTransport__process_variables__concentration}
-        "concentration",
-        //! \ogs_file_param_special{prj__processes__process__ComponentTransport__process_variables__pressure}
-        "pressure"});
+            auto per_process_variables =
+                findProcessVariables(variables, pv_config, {variable_names[i]});
+            process_variables.push_back(std::move(per_process_variables));
+        }
+    }
 
     MaterialLib::PorousMedium::PorousMediaProperties porous_media_properties{
         MaterialLib::PorousMedium::createPorousMediaProperties(
@@ -144,7 +168,8 @@ std::unique_ptr<Process> createComponentTransportProcess(
     return std::make_unique<ComponentTransportProcess>(
         mesh, std::move(jacobian_assembler), parameters, integration_order,
         std::move(process_variables), std::move(process_data),
-        std::move(secondary_variables), std::move(named_function_caller));
+        std::move(secondary_variables), std::move(named_function_caller),
+        use_monolithic_scheme);
 }
 
 }  // namespace ComponentTransport
diff --git a/ProcessLib/CoupledSolutionsForStaggeredScheme.cpp b/ProcessLib/CoupledSolutionsForStaggeredScheme.cpp
index 1f7eecb535dea26e6bec7a14dfbd51c0f6ae8a15..71766d222cad031446174bc138ce0e8bc3829ace 100644
--- a/ProcessLib/CoupledSolutionsForStaggeredScheme.cpp
+++ b/ProcessLib/CoupledSolutionsForStaggeredScheme.cpp
@@ -18,48 +18,44 @@
 namespace ProcessLib
 {
 CoupledSolutionsForStaggeredScheme::CoupledSolutionsForStaggeredScheme(
-    std::unordered_map<std::type_index, Process const&> const&
-        coupled_processes_,
-    std::unordered_map<std::type_index, GlobalVector const&> const& coupled_xs_,
-    const double dt_)
-    : coupled_processes(coupled_processes_), coupled_xs(coupled_xs_), dt(dt_)
+    std::vector<std::reference_wrapper<GlobalVector const>> const& coupled_xs_,
+    const double dt_, const int process_id_)
+    : coupled_xs(coupled_xs_), dt(dt_), process_id(process_id_)
 {
-    for (auto const& coupled_x_pair : coupled_xs)
+    for (auto const& coupled_x : coupled_xs)
     {
-        auto const& coupled_x = coupled_x_pair.second;
-        MathLib::LinAlg::setLocalAccessibleVector(coupled_x);
+        MathLib::LinAlg::setLocalAccessibleVector(coupled_x.get());
     }
+}
+
+std::vector<std::vector<double>> getPreviousLocalSolutions(
+    const CoupledSolutionsForStaggeredScheme& cpl_xs,
+    const std::vector<GlobalIndexType>& indices)
+{
+    const auto number_of_coupled_solutions = cpl_xs.coupled_xs.size();
+    std::vector<std::vector<double>> local_xs_t0;
+    local_xs_t0.reserve(number_of_coupled_solutions);
 
-    for (auto const& coupled_process_pair : coupled_processes)
+    for (auto const& x_t0 : cpl_xs.coupled_xs_t0)
     {
-        auto const& coupled_pcs = coupled_process_pair.second;
-        auto const prevous_time_x = coupled_pcs.getPreviousTimeStepSolution();
-        if (prevous_time_x)
-        {
-            MathLib::LinAlg::setLocalAccessibleVector(*prevous_time_x);
-        }
+        local_xs_t0.emplace_back(x_t0->get(indices));
     }
+    return local_xs_t0;
 }
 
-std::unordered_map<std::type_index, const std::vector<double>>
-getCurrentLocalSolutionsOfCoupledProcesses(
-    const std::unordered_map<std::type_index, GlobalVector const&>&
-        global_coupled_xs,
+std::vector<std::vector<double>> getCurrentLocalSolutions(
+    const CoupledSolutionsForStaggeredScheme& cpl_xs,
     const std::vector<GlobalIndexType>& indices)
 {
-    std::unordered_map<std::type_index, const std::vector<double>>
-        local_coupled_xs;
+    const auto number_of_coupled_solutions = cpl_xs.coupled_xs.size();
+    std::vector<std::vector<double>> local_xs_t1;
+    local_xs_t1.reserve(number_of_coupled_solutions);
 
-    // Get local nodal solutions of the coupled equations.
-    for (auto const& global_coupled_x_pair : global_coupled_xs)
+    for (auto const& x_t1 : cpl_xs.coupled_xs)
     {
-        auto const& coupled_x = global_coupled_x_pair.second;
-        auto const local_coupled_x = coupled_x.get(indices);
-        BaseLib::insertIfTypeIndexKeyUniqueElseError(
-            local_coupled_xs, global_coupled_x_pair.first, local_coupled_x,
-            "local_coupled_x");
+        local_xs_t1.emplace_back(x_t1.get().get(indices));
     }
-    return local_coupled_xs;
+    return local_xs_t1;
 }
 
-}  // end of ProcessLib
+}  // namespace ProcessLib
diff --git a/ProcessLib/CoupledSolutionsForStaggeredScheme.h b/ProcessLib/CoupledSolutionsForStaggeredScheme.h
index c2880de217d49747b2ed7525f1cc18a905a15259..9a940d1ebc7f64a3093de0d83362eac7f7770de2 100644
--- a/ProcessLib/CoupledSolutionsForStaggeredScheme.h
+++ b/ProcessLib/CoupledSolutionsForStaggeredScheme.h
@@ -12,18 +12,17 @@
 
 #pragma once
 
-#include <unordered_map>
-#include <typeindex>
+#include <functional>
+#include <utility>
+#include <vector>
 
 #include "MathLib/LinAlg/GlobalMatrixVectorTypes.h"
 
 namespace ProcessLib
 {
-class Process;
-
 /**
- *  A struct to keep the references of the coupled processes and the references
- *  of the current solutions of the equations of the coupled processes.
+ *  A struct to keep the references of the current solutions of the equations of
+ *  the coupled processes.
  *
  *  During staggered coupling iteration, an instance of this struct is created
  *  and passed through interfaces to global and local assemblers for each
@@ -32,68 +31,54 @@ class Process;
 struct CoupledSolutionsForStaggeredScheme
 {
     CoupledSolutionsForStaggeredScheme(
-        std::unordered_map<std::type_index, Process const&> const&
-            coupled_processes_,
-        std::unordered_map<std::type_index, GlobalVector const&> const&
+        std::vector<std::reference_wrapper<GlobalVector const>> const&
             coupled_xs_,
-        const double dt_);
-
-    /// References to the coupled processes are distinguished by the keys of
-    /// process types.
-    std::unordered_map<std::type_index, Process const&> const&
-        coupled_processes;
+        const double dt_, const int process_id_);
 
     /// References to the current solutions of the coupled processes.
-    /// The coupled solutions are distinguished by the keys of process types.
-    std::unordered_map<std::type_index, GlobalVector const&> const& coupled_xs;
+    std::vector<std::reference_wrapper<GlobalVector const>> const& coupled_xs;
+
+    /// Pointers to the vector of the solutions of the previous time step.
+    std::vector<GlobalVector*> coupled_xs_t0;
 
     const double dt;  ///< Time step size.
+    const int process_id;
 };
 
 /**
- *  A struct to keep the references of the coupled processes, and local element
- *  solutions  of the current and previous time step solutions of the equations
- *  of the coupled processes.
+ *  A struct to keep the references to the local element solutions  of the
+ *  current and previous time step solutions of the equations of the coupled
+ *  processes.
  *
  *  During the global assembly loop, an instance of this struct is created for
  *  each element and it is then passed to local assemblers.
  */
 struct LocalCoupledSolutions
 {
-    LocalCoupledSolutions(
-        const double dt_,
-        std::unordered_map<std::type_index, Process const&> const&
-            coupled_processes_,
-        std::unordered_map<std::type_index, const std::vector<double>>&&
-            local_coupled_xs0_,
-        std::unordered_map<std::type_index, const std::vector<double>>&&
-            local_coupled_xs_)
+    LocalCoupledSolutions(const double dt_, const int process_id_,
+                          std::vector<std::vector<double>>&& local_coupled_xs0_,
+                          std::vector<std::vector<double>>&& local_coupled_xs_)
         : dt(dt_),
-          coupled_processes(coupled_processes_),
+          process_id(process_id_),
           local_coupled_xs0(std::move(local_coupled_xs0_)),
           local_coupled_xs(std::move(local_coupled_xs_))
     {
     }
 
     const double dt;  ///< Time step size.
-
-    /// References to the coupled processes are distinguished by the keys of
-    /// process types.
-    std::unordered_map<std::type_index, Process const&> const&
-        coupled_processes;
+    const int process_id;
 
     /// Local solutions of the previous time step.
-    std::unordered_map<std::type_index, const std::vector<double>> const
-        local_coupled_xs0;
+    std::vector<std::vector<double>> const local_coupled_xs0;
     /// Local solutions of the current time step.
-    std::unordered_map<std::type_index, const std::vector<double>> const
-        local_coupled_xs;
+    std::vector<std::vector<double>> const local_coupled_xs;
 };
 
-std::unordered_map<std::type_index, const std::vector<double>>
-getCurrentLocalSolutionsOfCoupledProcesses(
-    const std::unordered_map<std::type_index, GlobalVector const&>&
-        global_coupled_xs,
+std::vector<std::vector<double>> getPreviousLocalSolutions(
+    const CoupledSolutionsForStaggeredScheme& cpl_xs,
     const std::vector<GlobalIndexType>& indices);
 
+std::vector<std::vector<double>> getCurrentLocalSolutions(
+    const CoupledSolutionsForStaggeredScheme& cpl_xs,
+    const std::vector<GlobalIndexType>& indices);
 }  // end of ProcessLib
diff --git a/ProcessLib/GroundwaterFlow/CreateGroundwaterFlowProcess.cpp b/ProcessLib/GroundwaterFlow/CreateGroundwaterFlowProcess.cpp
index da6ab5ccd85cf43fff45a7ee88a6454935541110..f49458fcd0fcdca11c884d79c1be90a52a8cadd2 100644
--- a/ProcessLib/GroundwaterFlow/CreateGroundwaterFlowProcess.cpp
+++ b/ProcessLib/GroundwaterFlow/CreateGroundwaterFlowProcess.cpp
@@ -42,10 +42,13 @@ std::unique_ptr<Process> createGroundwaterFlowProcess(
     //! \ogs_file_param{prj__processes__process__GROUNDWATER_FLOW__process_variables}
     auto const pv_config = config.getConfigSubtree("process_variables");
 
-    auto process_variables = findProcessVariables(
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>
+        process_variables;
+    auto per_process_variables = findProcessVariables(
         variables, pv_config,
         {//! \ogs_file_param_special{prj__processes__process__GROUNDWATER_FLOW__process_variables__process_variable}
          "process_variable"});
+    process_variables.push_back(std::move(per_process_variables));
 
     // Hydraulic conductivity parameter.
     auto& hydraulic_conductivity = findParameter<double>(
diff --git a/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.cpp b/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.cpp
index 68d90027557df7a5fcf1cc383236f7df361cddd4..90c27d71016c381c329297cb0428fdcae09895ca 100644
--- a/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.cpp
+++ b/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.cpp
@@ -22,7 +22,8 @@ GroundwaterFlowProcess::GroundwaterFlowProcess(
     std::unique_ptr<ProcessLib::AbstractJacobianAssembler>&& jacobian_assembler,
     std::vector<std::unique_ptr<ParameterBase>> const& parameters,
     unsigned const integration_order,
-    std::vector<std::reference_wrapper<ProcessVariable>>&& process_variables,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
+        process_variables,
     GroundwaterFlowProcessData&& process_data,
     SecondaryVariableCollection&& secondary_variables,
     NumLib::NamedFunctionCaller&& named_function_caller,
diff --git a/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.h b/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.h
index dad280e941880b27694d9debec5e15c105632440..b63029f0bf87dbf63c6429e0c1d0fe536a3ac82d 100644
--- a/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.h
+++ b/ProcessLib/GroundwaterFlow/GroundwaterFlowProcess.h
@@ -33,7 +33,7 @@ public:
             jacobian_assembler,
         std::vector<std::unique_ptr<ParameterBase>> const& parameters,
         unsigned const integration_order,
-        std::vector<std::reference_wrapper<ProcessVariable>>&&
+        std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
             process_variables,
         GroundwaterFlowProcessData&& process_data,
         SecondaryVariableCollection&& secondary_variables,
diff --git a/ProcessLib/HT/CreateHTProcess.cpp b/ProcessLib/HT/CreateHTProcess.cpp
index 5a0302421a2d2e3c7560c37dff0e8dc3c96908d4..cd48556153cf9af9f78cb67c3ba4b36c2e9038b9 100644
--- a/ProcessLib/HT/CreateHTProcess.cpp
+++ b/ProcessLib/HT/CreateHTProcess.cpp
@@ -18,6 +18,7 @@
 
 #include "HTProcess.h"
 #include "HTMaterialProperties.h"
+#include "HTLocalAssemblerInterface.h"
 
 namespace ProcessLib
 {
@@ -36,17 +37,39 @@ std::unique_ptr<Process> createHTProcess(
 
     DBUG("Create HTProcess.");
 
+    auto const staggered_scheme =
+        //! \ogs_file_param{prj__processes__process__HT__coupling_scheme}
+        config.getConfigParameterOptional<std::string>("coupling_scheme");
+    const bool use_monolithic_scheme =
+        !(staggered_scheme && (*staggered_scheme == "staggered"));
+
     // Process variable.
 
     //! \ogs_file_param{prj__processes__process__HT__process_variables}
     auto const pv_config = config.getConfigSubtree("process_variables");
 
-    auto process_variables = findProcessVariables(
-        variables, pv_config,
-        {//! \ogs_file_param_special{prj__processes__process__HT__process_variables__temperature}
-         "temperature",
-         //! \ogs_file_param_special{prj__processes__process__HT__process_variables__pressure}
-         "pressure"});
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>
+        process_variables;
+    if (use_monolithic_scheme)  // monolithic scheme.
+    {
+        auto per_process_variables = findProcessVariables(
+            variables, pv_config,
+            {//! \ogs_file_param_special{prj__processes__process__HT__process_variables__temperature}
+             "temperature",
+             //! \ogs_file_param_special{prj__processes__process__HT__process_variables__pressure}
+             "pressure"});
+        process_variables.push_back(std::move(per_process_variables));
+    }
+    else  // staggered scheme.
+    {
+        using namespace std::string_literals;
+        for (auto const& variable_name : {"temperature"s, "pressure"s})
+        {
+            auto per_process_variables =
+                findProcessVariables(variables, pv_config, {variable_name});
+            process_variables.push_back(std::move(per_process_variables));
+        }
+    }
 
     MaterialLib::PorousMedium::PorousMediaProperties porous_media_properties{
         MaterialLib::PorousMedium::createPorousMediaProperties(mesh, config,
@@ -57,14 +80,6 @@ std::unique_ptr<Process> createHTProcess(
     auto fluid_properties =
         MaterialLib::Fluid::createFluidProperties(fluid_config);
 
-    // Parameter for the density of the fluid.
-    auto& fluid_reference_density = findParameter<double>(
-        config,
-        //! \ogs_file_param_special{prj__processes__process__HT__fluid_reference_density}
-        "fluid_reference_density", parameters, 1);
-    DBUG("Use \'%s\' as fluid_reference_density parameter.",
-         fluid_reference_density.name.c_str());
-
     // Parameter for the density of the solid.
     auto& density_solid = findParameter<double>(
         config,
@@ -82,21 +97,40 @@ std::unique_ptr<Process> createHTProcess(
 
     // Parameter for the thermal conductivity of the solid (only one scalar per
     // element, i.e., the isotropic case is handled at the moment)
-    auto& thermal_dispersivity_longitudinal = findParameter<double>(
-        config,
-        //! \ogs_file_param_special{prj__processes__process__HT__thermal_dispersivity_longitudinal}
-        "thermal_dispersivity_longitudinal", parameters, 1);
-    DBUG("Use \'%s\' as thermal_dispersivity_longitudinal parameter.",
-         thermal_dispersivity_longitudinal.name.c_str());
 
-    // Parameter for the thermal conductivity of the solid (only one scalar per
-    // element, i.e., the isotropic case is handled at the moment)
-    auto& thermal_dispersivity_transversal = findParameter<double>(
-        config,
-        //! \ogs_file_param_special{prj__processes__process__HT__thermal_dispersivity_transversal}
-        "thermal_dispersivity_transversal", parameters, 1);
-    DBUG("Use \'%s\' as thermal_dispersivity_transversal parameter.",
-         thermal_dispersivity_transversal.name.c_str());
+    ConstantParameter<double> default_thermal_dispersivity_longitudinal(
+        "default thermal dispersivity longitudinal", 0.);
+    ConstantParameter<double> default_thermal_dispersivity_transversal(
+        "default thermal dispersivity transversal", 0.);
+
+    Parameter<double>* thermal_dispersivity_longitudinal =
+        &default_thermal_dispersivity_longitudinal;
+    Parameter<double>* thermal_dispersivity_transversal =
+        &default_thermal_dispersivity_transversal;
+    auto const dispersion_config =
+        //! \ogs_file_param{prj__processes__process__HT__thermal_dispersivity}
+        config.getConfigSubtreeOptional("thermal_dispersivity");
+    bool const has_fluid_thermal_dispersivity =
+        static_cast<bool>(dispersion_config);
+    if (dispersion_config)
+    {
+        thermal_dispersivity_longitudinal = &findParameter<double>(
+            *dispersion_config,
+            //! \ogs_file_param_special{prj__processes__process__HT__thermal_dispersivity__longitudinal}
+            "longitudinal", parameters, 1);
+        DBUG("Use \'%s\' as thermal_dispersivity_longitudinal parameter.",
+             thermal_dispersivity_longitudinal->name.c_str());
+
+        // Parameter for the thermal conductivity of the solid (only one scalar
+        // per
+        // element, i.e., the isotropic case is handled at the moment)
+        thermal_dispersivity_transversal = &findParameter<double>(
+            *dispersion_config,
+            //! \ogs_file_param_special{prj__processes__process__HT__thermal_dispersivity__transversal}
+            "transversal", parameters, 1);
+        DBUG("Use \'%s\' as thermal_dispersivity_transversal parameter.",
+             thermal_dispersivity_transversal->name.c_str());
+    }
 
     // Parameter for the thermal conductivity of the solid (only one scalar per
     // element, i.e., the isotropic case is handled at the moment)
@@ -120,12 +154,14 @@ std::unique_ptr<Process> createHTProcess(
     std::vector<double> const b =
         //! \ogs_file_param{prj__processes__process__HT__specific_body_force}
         config.getConfigParameter<std::vector<double>>("specific_body_force");
-    assert(b.size() > 0 && b.size() < 4);
+    assert(!b.empty() && b.size() < 4);
     if (b.size() < mesh.getDimension())
+    {
         OGS_FATAL(
             "specific body force (gravity vector) has %d components, mesh "
             "dimension is %d",
             b.size(), mesh.getDimension());
+    }
     bool const has_gravity = MathLib::toVector(b).norm() > 0;
     if (has_gravity)
     {
@@ -133,17 +169,45 @@ std::unique_ptr<Process> createHTProcess(
         std::copy_n(b.data(), b.size(), specific_body_force.data());
     }
 
+    ConstantParameter<double> default_solid_thermal_expansion(
+        "default solid thermal expansion", 0.);
+    ConstantParameter<double> default_biot_constant("default_biot constant",
+                                                    0.);
+    Parameter<double>* solid_thermal_expansion =
+        &default_solid_thermal_expansion;
+    Parameter<double>* biot_constant = &default_biot_constant;
+
+    auto const solid_config =
+        //! \ogs_file_param{prj__processes__process__HT__solid_thermal_expansion}
+        config.getConfigSubtreeOptional("solid_thermal_expansion");
+    const bool has_fluid_thermal_expansion = static_cast<bool>(solid_config);
+    if (solid_config)
+    {
+        solid_thermal_expansion = &findParameter<double>(
+            //! \ogs_file_param_special{prj__processes__process__HT__solid_thermal_expansion__thermal_expansion}
+            *solid_config, "thermal_expansion", parameters, 1);
+        DBUG("Use \'%s\' as solid thermal expansion.",
+             solid_thermal_expansion->name.c_str());
+        biot_constant = &findParameter<double>(
+            //! \ogs_file_param_special{prj__processes__process__HT__solid_thermal_expansion__biot_constant}
+            *solid_config, "biot_constant", parameters, 1);
+        DBUG("Use \'%s\' as Biot's constant.", biot_constant->name.c_str());
+    }
+
     std::unique_ptr<HTMaterialProperties> material_properties =
         std::make_unique<HTMaterialProperties>(
             std::move(porous_media_properties),
             density_solid,
-            fluid_reference_density,
             std::move(fluid_properties),
-            thermal_dispersivity_longitudinal,
-            thermal_dispersivity_transversal,
+            has_fluid_thermal_dispersivity,
+            *thermal_dispersivity_longitudinal,
+            *thermal_dispersivity_transversal,
             specific_heat_capacity_solid,
             thermal_conductivity_solid,
             thermal_conductivity_fluid,
+            has_fluid_thermal_expansion,
+            *solid_thermal_expansion,
+            *biot_constant,
             specific_body_force,
             has_gravity);
 
@@ -158,7 +222,8 @@ std::unique_ptr<Process> createHTProcess(
     return std::make_unique<HTProcess>(
         mesh, std::move(jacobian_assembler), parameters, integration_order,
         std::move(process_variables), std::move(material_properties),
-        std::move(secondary_variables), std::move(named_function_caller));
+        std::move(secondary_variables), std::move(named_function_caller),
+        use_monolithic_scheme);
 }
 
 }  // namespace HT
diff --git a/ProcessLib/HT/HTFEM.h b/ProcessLib/HT/HTFEM.h
index 0cfc29612caca7c9c6939325d56e07a3e57c8c60..626e71b4f1fd41a62d8dd64975cbe11f2513c219 100644
--- a/ProcessLib/HT/HTFEM.h
+++ b/ProcessLib/HT/HTFEM.h
@@ -12,68 +12,28 @@
 #include <Eigen/Dense>
 #include <vector>
 
-
 #include "HTMaterialProperties.h"
-#include "MathLib/LinAlg/Eigen/EigenMapTools.h"
+
 #include "NumLib/DOF/DOFTableUtil.h"
 #include "NumLib/Extrapolation/ExtrapolatableElement.h"
 #include "NumLib/Fem/FiniteElement/TemplateIsoparametric.h"
 #include "NumLib/Fem/ShapeMatrixPolicy.h"
-#include "NumLib/Function/Interpolation.h"
-#include "ProcessLib/LocalAssemblerInterface.h"
 #include "ProcessLib/Parameter/Parameter.h"
 #include "ProcessLib/Utils/InitShapeMatrices.h"
 
+#include "HTLocalAssemblerInterface.h"
+
 namespace ProcessLib
 {
 namespace HT
 {
-template < typename NodalRowVectorType, typename GlobalDimNodalMatrixType>
-struct IntegrationPointData final
-{
-    IntegrationPointData(NodalRowVectorType N_,
-                         GlobalDimNodalMatrixType dNdx_,
-                         double const& integration_weight_)
-        : N(std::move(N_)),
-          dNdx(std::move(dNdx_)),
-          integration_weight(integration_weight_)
-    {}
-
-    NodalRowVectorType const N;
-    GlobalDimNodalMatrixType const dNdx;
-    double const integration_weight;
-
-    EIGEN_MAKE_ALIGNED_OPERATOR_NEW;
-};
-
-const unsigned NUM_NODAL_DOF = 2;
-
-class HTLocalAssemblerInterface
-    : public ProcessLib::LocalAssemblerInterface,
-      public NumLib::ExtrapolatableElement
-{
-public:
-    virtual std::vector<double> const& getIntPtDarcyVelocity(
-        const double /*t*/,
-        GlobalVector const& /*current_solution*/,
-        NumLib::LocalToGlobalIndexMap const& /*dof_table*/,
-        std::vector<double>& /*cache*/) const = 0;
-};
-
 template <typename ShapeFunction, typename IntegrationMethod,
           unsigned GlobalDim>
-class LocalAssemblerData : public HTLocalAssemblerInterface
+class HTFEM : public HTLocalAssemblerInterface
 {
     using ShapeMatricesType = ShapeMatrixPolicyType<ShapeFunction, GlobalDim>;
     using ShapeMatrices = typename ShapeMatricesType::ShapeMatrices;
 
-    using LocalMatrixType = typename ShapeMatricesType::template MatrixType<
-        NUM_NODAL_DOF * ShapeFunction::NPOINTS,
-        NUM_NODAL_DOF * ShapeFunction::NPOINTS>;
-    using LocalVectorType =
-        typename ShapeMatricesType::template VectorType<NUM_NODAL_DOF *
-                                                        ShapeFunction::NPOINTS>;
-
     using NodalVectorType = typename ShapeMatricesType::NodalVectorType;
     using NodalRowVectorType = typename ShapeMatricesType::NodalRowVectorType;
 
@@ -83,19 +43,22 @@ class LocalAssemblerData : public HTLocalAssemblerInterface
     using GlobalDimMatrixType = typename ShapeMatricesType::GlobalDimMatrixType;
 
 public:
-    LocalAssemblerData(MeshLib::Element const& element,
-                       std::size_t const local_matrix_size,
-                       bool is_axially_symmetric,
-                       unsigned const integration_order,
-                       HTMaterialProperties const& process_data)
-        : _element(element),
-          _process_data(process_data),
+    HTFEM(MeshLib::Element const& element,
+          std::size_t const local_matrix_size,
+          bool is_axially_symmetric,
+          unsigned const integration_order,
+          HTMaterialProperties const& material_properties,
+          const unsigned dof_per_node)
+        : HTLocalAssemblerInterface(),
+          _element(element),
+          _material_properties(material_properties),
           _integration_method(integration_order)
     {
         // This assertion is valid only if all nodal d.o.f. use the same shape
         // matrices.
-        assert(local_matrix_size == ShapeFunction::NPOINTS * NUM_NODAL_DOF);
+        assert(local_matrix_size == ShapeFunction::NPOINTS * dof_per_node);
         (void)local_matrix_size;
+        (void)dof_per_node;
 
         unsigned const n_integration_points =
             _integration_method.getNumberOfPoints();
@@ -116,178 +79,79 @@ public:
         }
     }
 
-    void assemble(double const t, std::vector<double> const& local_x,
-                  std::vector<double>& local_M_data,
-                  std::vector<double>& local_K_data,
-                  std::vector<double>& local_b_data) override
+    Eigen::Map<const Eigen::RowVectorXd> getShapeMatrix(
+        const unsigned integration_point) const override
     {
-        auto const local_matrix_size = local_x.size();
-        // This assertion is valid only if all nodal d.o.f. use the same shape
-        // matrices.
-        assert(local_matrix_size == ShapeFunction::NPOINTS * NUM_NODAL_DOF);
-
-        auto local_M = MathLib::createZeroedMatrix<LocalMatrixType>(
-            local_M_data, local_matrix_size, local_matrix_size);
-        auto local_K = MathLib::createZeroedMatrix<LocalMatrixType>(
-            local_K_data, local_matrix_size, local_matrix_size);
-        auto local_b = MathLib::createZeroedVector<LocalVectorType>(
-            local_b_data, local_matrix_size);
-
-        auto const num_nodes = ShapeFunction::NPOINTS;
-
-        auto Ktt = local_K.template block<num_nodes, num_nodes>(0, 0);
-        auto Mtt = local_M.template block<num_nodes, num_nodes>(0, 0);
-        auto Kpp =
-            local_K.template block<num_nodes, num_nodes>(num_nodes, num_nodes);
-        auto Mpp =
-            local_M.template block<num_nodes, num_nodes>(num_nodes, num_nodes);
-        auto Bp = local_b.template block<num_nodes, 1>(num_nodes, 0);
-
-        SpatialPosition pos;
-        pos.setElementID(_element.getID());
-
-        auto p_nodal_values =
-            Eigen::Map<const NodalVectorType>(&local_x[num_nodes], num_nodes);
-
-        auto const& b = _process_data.specific_body_force;
-
-        GlobalDimMatrixType const& I(
-            GlobalDimMatrixType::Identity(GlobalDim, GlobalDim));
-
-        MaterialLib::Fluid::FluidProperty::ArrayType vars;
-
-        unsigned const n_integration_points =
-            _integration_method.getNumberOfPoints();
-
-        for (std::size_t ip(0); ip < n_integration_points; ip++)
-        {
-            pos.setIntegrationPoint(ip);
-
-            auto const fluid_reference_density =
-                _process_data.fluid_reference_density(t, pos)[0];
-
-            auto const density_solid = _process_data.density_solid(t, pos)[0];
-            // \todo the argument to getValue() has to be changed for non
-            // constant storage model
-            auto const specific_storage =
-                _process_data.porous_media_properties.getSpecificStorage(t, pos)
-                    .getValue(0.0);
-
-            auto const thermal_conductivity_solid =
-                _process_data.thermal_conductivity_solid(t, pos)[0];
-            auto const thermal_conductivity_fluid =
-                _process_data.thermal_conductivity_fluid(t, pos)[0];
-
-            auto const& ip_data = _ip_data[ip];
-            auto const& N = ip_data.N;
-            auto const& dNdx = ip_data.dNdx;
-            auto const& w = ip_data.integration_weight;
-
-            double T_int_pt = 0.0;
-            double p_int_pt = 0.0;
-            // Order matters: First T, then P!
-            NumLib::shapeFunctionInterpolate(local_x, N, T_int_pt, p_int_pt);
-
-            // \todo the first argument has to be changed for non constant
-            // porosity model
-            auto const porosity =
-                _process_data.porous_media_properties.getPorosity(t, pos)
-                    .getValue(t, pos, 0.0, T_int_pt);
-
-            auto const intrinsic_permeability =
-                _process_data.porous_media_properties.getIntrinsicPermeability(
-                    t, pos).getValue(t, pos, 0.0, T_int_pt);
+        auto const& N = _ip_data[integration_point].N;
 
-            double const thermal_conductivity =
-                thermal_conductivity_solid * (1 - porosity) +
-                 thermal_conductivity_fluid * porosity;
+        // assumes N is stored contiguously in memory
+        return Eigen::Map<const Eigen::RowVectorXd>(N.data(), N.size());
+    }
 
-            auto const specific_heat_capacity_solid =
-                _process_data.specific_heat_capacity_solid(t, pos)[0];
+protected:
+    MeshLib::Element const& _element;
+    HTMaterialProperties const& _material_properties;
 
-            vars[static_cast<int>(
-                MaterialLib::Fluid::PropertyVariableType::T)] = T_int_pt;
-            vars[static_cast<int>(
-                MaterialLib::Fluid::PropertyVariableType::p)] = p_int_pt;
-            auto const specific_heat_capacity_fluid =
-                _process_data.fluid_properties->getValue(
-                    MaterialLib::Fluid::FluidPropertyType::HeatCapacity, vars);
+    IntegrationMethod const _integration_method;
+    std::vector<
+        IntegrationPointData<NodalRowVectorType, GlobalDimNodalMatrixType>,
+        Eigen::aligned_allocator<
+            IntegrationPointData<NodalRowVectorType, GlobalDimNodalMatrixType>>>
+        _ip_data;
 
-            auto const thermal_dispersivity_longitudinal =
-                _process_data.thermal_dispersivity_longitudinal(t, pos)[0];
-            auto const thermal_dispersivity_transversal =
-                _process_data.thermal_dispersivity_transversal(t, pos)[0];
+    double getHeatEnergyCoefficient(const double t, const SpatialPosition& pos,
+                                    const double porosity,
+                                    const double fluid_density,
+                                    const double specific_heat_capacity_fluid)
+    {
+        auto const specific_heat_capacity_solid =
+            _material_properties.specific_heat_capacity_solid(t, pos)[0];
 
-            // Use the fluid density model to compute the density
-            auto const density = _process_data.fluid_properties->getValue(
-                MaterialLib::Fluid::FluidPropertyType::Density, vars);
+        auto const solid_density = _material_properties.density_solid(t, pos)[0];
 
-            // Use the viscosity model to compute the viscosity
-            auto const viscosity = _process_data.fluid_properties->getValue(
-                MaterialLib::Fluid::FluidPropertyType::Viscosity, vars);
-            GlobalDimMatrixType K_over_mu = intrinsic_permeability / viscosity;
-
-            GlobalDimVectorType const velocity =
-                _process_data.has_gravity
-                    ? GlobalDimVectorType(-K_over_mu *
-                                          (dNdx * p_nodal_values - density * b))
-                    : GlobalDimVectorType(-K_over_mu * dNdx * p_nodal_values);
-
-
-            double const velocity_magnitude = velocity.norm();
-            GlobalDimMatrixType const thermal_dispersivity =
-                fluid_reference_density * specific_heat_capacity_fluid *
-                (thermal_dispersivity_transversal * velocity_magnitude *
-                     I +
-                 (thermal_dispersivity_longitudinal -
-                  thermal_dispersivity_transversal) /
-                     velocity_magnitude * velocity * velocity.transpose());
-
-            GlobalDimMatrixType const hydrodynamic_thermodispersion =
-                thermal_conductivity * I + thermal_dispersivity;
-
-            double const heat_capacity =
-                density_solid * specific_heat_capacity_solid * (1 - porosity) +
-                fluid_reference_density * specific_heat_capacity_fluid * porosity;
-
-            // matrix assembly
-            Ktt.noalias() +=
-                (dNdx.transpose() * hydrodynamic_thermodispersion * dNdx +
-                 N.transpose() * velocity.transpose() * dNdx *
-                     fluid_reference_density * specific_heat_capacity_fluid) *
-                w;
-            Kpp.noalias() += w * dNdx.transpose() * K_over_mu * dNdx;
-            Mtt.noalias() += w * N.transpose() * heat_capacity * N;
-            Mpp.noalias() += w * N.transpose() * specific_storage * N;
-            if (_process_data.has_gravity)
-                Bp += w * density * dNdx.transpose() * K_over_mu * b;
-            /* with Oberbeck-Boussing assumption density difference only exists
-             * in buoyancy effects */
-        }
+        return solid_density * specific_heat_capacity_solid * (1 - porosity) +
+               fluid_density * specific_heat_capacity_fluid * porosity;
     }
 
-    Eigen::Map<const Eigen::RowVectorXd> getShapeMatrix(
-        const unsigned integration_point) const override
+    GlobalDimMatrixType getThermalConductivityDispersivity(
+        const double t, const SpatialPosition& pos, const double porosity,
+        const double fluid_density, const double specific_heat_capacity_fluid,
+        const GlobalDimVectorType& velocity, const GlobalDimMatrixType& I)
     {
-        auto const& N = _ip_data[integration_point].N;
-
-        // assumes N is stored contiguously in memory
-        return Eigen::Map<const Eigen::RowVectorXd>(N.data(), N.size());
+        auto const thermal_conductivity_solid =
+            _material_properties.thermal_conductivity_solid(t, pos)[0];
+        auto const thermal_conductivity_fluid =
+            _material_properties.thermal_conductivity_fluid(t, pos)[0];
+        double const thermal_conductivity =
+            thermal_conductivity_solid * (1 - porosity) +
+            thermal_conductivity_fluid * porosity;
+
+        if (!_material_properties.has_fluid_thermal_dispersivity)
+            return thermal_conductivity * I;
+
+        double const thermal_dispersivity_longitudinal =
+            _material_properties.thermal_dispersivity_longitudinal(t, pos)[0];
+        auto const thermal_dispersivity_transversal =
+            _material_properties.thermal_dispersivity_transversal(t, pos)[0];
+
+        double const velocity_magnitude = velocity.norm();
+        GlobalDimMatrixType const thermal_dispersivity =
+            fluid_density * specific_heat_capacity_fluid *
+            (thermal_dispersivity_transversal * velocity_magnitude * I +
+             (thermal_dispersivity_longitudinal -
+              thermal_dispersivity_transversal) /
+                 velocity_magnitude * velocity * velocity.transpose());
+
+        return thermal_conductivity * I + thermal_dispersivity;
     }
 
-    std::vector<double> const& getIntPtDarcyVelocity(
-        const double t,
-        GlobalVector const& current_solution,
-        NumLib::LocalToGlobalIndexMap const& dof_table,
-        std::vector<double>& cache) const override
+    std::vector<double> const& getIntPtDarcyVelocityLocal(
+        const double t, std::vector<double> const& local_p,
+        std::vector<double> const& local_T, std::vector<double>& cache) const
     {
         auto const n_integration_points =
             _integration_method.getNumberOfPoints();
 
-        auto const indices = NumLib::getIndices(_element.getID(), dof_table);
-        assert(!indices.empty());
-        auto const local_x = current_solution.get(indices);
-
         cache.clear();
         auto cache_mat = MathLib::createZeroedMatrix<
             Eigen::Matrix<double, GlobalDim, Eigen::Dynamic, Eigen::RowMajor>>(
@@ -299,7 +163,7 @@ public:
         MaterialLib::Fluid::FluidProperty::ArrayType vars;
 
         auto const p_nodal_values = Eigen::Map<const NodalVectorType>(
-            &local_x[ShapeFunction::NPOINTS], ShapeFunction::NPOINTS);
+            &local_p[0], ShapeFunction::NPOINTS);
 
         for (unsigned ip = 0; ip < n_integration_points; ++ip)
         {
@@ -311,27 +175,29 @@ public:
 
             double T_int_pt = 0.0;
             double p_int_pt = 0.0;
-            NumLib::shapeFunctionInterpolate(local_x, N, T_int_pt, p_int_pt);
+            NumLib::shapeFunctionInterpolate(local_p, N, p_int_pt);
+            NumLib::shapeFunctionInterpolate(local_T, N, T_int_pt);
             vars[static_cast<int>(
                 MaterialLib::Fluid::PropertyVariableType::T)] = T_int_pt;
             vars[static_cast<int>(
                 MaterialLib::Fluid::PropertyVariableType::p)] = p_int_pt;
 
-            auto const K =
-                _process_data.porous_media_properties.getIntrinsicPermeability(
-                    t, pos).getValue(t, pos, 0.0, T_int_pt);
+            auto const K = _material_properties.porous_media_properties
+                               .getIntrinsicPermeability(t, pos)
+                               .getValue(t, pos, 0.0, T_int_pt);
 
-            auto const mu = _process_data.fluid_properties->getValue(
+            auto const mu = _material_properties.fluid_properties->getValue(
                 MaterialLib::Fluid::FluidPropertyType::Viscosity, vars);
             GlobalDimMatrixType const K_over_mu = K / mu;
 
             cache_mat.col(ip).noalias() = -K_over_mu * dNdx * p_nodal_values;
 
-            if (_process_data.has_gravity)
+            if (_material_properties.has_gravity)
             {
-                auto const rho_w = _process_data.fluid_properties->getValue(
-                    MaterialLib::Fluid::FluidPropertyType::Density, vars);
-                auto const b = _process_data.specific_body_force;
+                auto const rho_w =
+                    _material_properties.fluid_properties->getValue(
+                        MaterialLib::Fluid::FluidPropertyType::Density, vars);
+                auto const b = _material_properties.specific_body_force;
                 // here it is assumed that the vector b is directed 'downwards'
                 cache_mat.col(ip).noalias() += K_over_mu * rho_w * b;
             }
@@ -339,17 +205,6 @@ public:
 
         return cache;
     }
-
-private:
-    MeshLib::Element const& _element;
-    HTMaterialProperties const& _process_data;
-
-    IntegrationMethod const _integration_method;
-    std::vector<
-        IntegrationPointData<NodalRowVectorType, GlobalDimNodalMatrixType>,
-        Eigen::aligned_allocator<
-            IntegrationPointData<NodalRowVectorType, GlobalDimNodalMatrixType>>>
-        _ip_data;
 };
 
 }  // namespace HT
diff --git a/ProcessLib/HT/HTLocalAssemblerInterface.h b/ProcessLib/HT/HTLocalAssemblerInterface.h
new file mode 100644
index 0000000000000000000000000000000000000000..ecb4d10faf5830da0248c01438cfd15d95baca79
--- /dev/null
+++ b/ProcessLib/HT/HTLocalAssemblerInterface.h
@@ -0,0 +1,74 @@
+/**
+ * \copyright
+ * Copyright (c) 2012-2017, OpenGeoSys Community (http://www.opengeosys.org)
+ *            Distributed under a Modified BSD License.
+ *              See accompanying file LICENSE.txt or
+ *              http://www.opengeosys.org/project/license
+ *
+ *  \file   HTLocalAssemblerInterface.h
+ *  Created on October 11, 2017, 1:35 PM
+ */
+
+#pragma once
+
+#include "NumLib/Function/Interpolation.h"
+#include "ProcessLib/LocalAssemblerInterface.h"
+#include "MathLib/LinAlg/Eigen/EigenMapTools.h"
+
+namespace ProcessLib
+{
+struct CoupledSolutionsForStaggeredScheme;
+
+namespace HT
+{
+template <typename NodalRowVectorType, typename GlobalDimNodalMatrixType>
+struct IntegrationPointData final
+{
+    IntegrationPointData(NodalRowVectorType N_,
+                         GlobalDimNodalMatrixType dNdx_,
+                         double const& integration_weight_)
+        : N(std::move(N_)),
+          dNdx(std::move(dNdx_)),
+          integration_weight(integration_weight_)
+    {
+    }
+
+    NodalRowVectorType const N;
+    GlobalDimNodalMatrixType const dNdx;
+    double const integration_weight;
+
+    EIGEN_MAKE_ALIGNED_OPERATOR_NEW;
+};
+
+class HTLocalAssemblerInterface : public ProcessLib::LocalAssemblerInterface,
+                                  public NumLib::ExtrapolatableElement
+{
+public:
+    HTLocalAssemblerInterface() : _coupled_solutions(nullptr) {}
+    void setStaggeredCoupledSolutions(
+        std::size_t const /*mesh_item_id*/,
+        CoupledSolutionsForStaggeredScheme* const coupling_term)
+    {
+        _coupled_solutions = coupling_term;
+    }
+
+    virtual std::vector<double> const& getIntPtDarcyVelocity(
+        const double /*t*/,
+        GlobalVector const& /*current_solution*/,
+        NumLib::LocalToGlobalIndexMap const& /*dof_table*/,
+        std::vector<double>& /*cache*/) const = 0;
+
+protected:
+    // TODO: remove _coupled_solutions or move integration point data from local
+    // assembler class to a new class to make local assembler unique for each
+    // process.
+    /** Pointer to CoupledSolutionsForStaggeredScheme that is set in a member of
+     *  Process class, setCoupledTermForTheStaggeredSchemeToLocalAssemblers.
+     *  It is used for calculate the secondary variables like velocity for
+     *  coupled processes.
+     */
+    CoupledSolutionsForStaggeredScheme* _coupled_solutions;
+};
+
+}  // namespace HT
+}  // namespace ProcessLib
\ No newline at end of file
diff --git a/ProcessLib/HT/HTMaterialProperties.h b/ProcessLib/HT/HTMaterialProperties.h
index fb056efbd8d09f76547ad6e3a27431e8236b2d73..995b3c4d4512bb399ab687706bc2e2b464a528fa 100644
--- a/ProcessLib/HT/HTMaterialProperties.h
+++ b/ProcessLib/HT/HTMaterialProperties.h
@@ -28,65 +28,50 @@ struct HTMaterialProperties
         MaterialLib::PorousMedium::PorousMediaProperties&&
             porous_media_properties_,
         ProcessLib::Parameter<double> const& density_solid_,
-        ProcessLib::Parameter<double> const& fluid_reference_density_,
         std::unique_ptr<MaterialLib::Fluid::FluidProperties>&&
             fluid_properties_,
+        bool const has_fluid_thermal_dispersivity_,
         ProcessLib::Parameter<double> const& thermal_dispersivity_longitudinal_,
         ProcessLib::Parameter<double> const& thermal_dispersivity_transversal_,
         ProcessLib::Parameter<double> const& specific_heat_capacity_solid_,
         ProcessLib::Parameter<double> const& thermal_conductivity_solid_,
         ProcessLib::Parameter<double> const& thermal_conductivity_fluid_,
+        bool const has_fluid_thermal_expansion_,
+        Parameter<double> const& solid_thermal_expansion_,
+        Parameter<double> const& biot_constant_,
         Eigen::VectorXd specific_body_force_,
         bool const has_gravity_)
         : porous_media_properties(std::move(porous_media_properties_)),
           density_solid(density_solid_),
-          fluid_reference_density(fluid_reference_density_),
           fluid_properties(std::move(fluid_properties_)),
           specific_heat_capacity_solid(specific_heat_capacity_solid_),
+          has_fluid_thermal_dispersivity(has_fluid_thermal_dispersivity_),
           thermal_dispersivity_longitudinal(thermal_dispersivity_longitudinal_),
           thermal_dispersivity_transversal(thermal_dispersivity_transversal_),
           thermal_conductivity_solid(thermal_conductivity_solid_),
           thermal_conductivity_fluid(thermal_conductivity_fluid_),
+          has_fluid_thermal_expansion(has_fluid_thermal_expansion_),
+          solid_thermal_expansion(solid_thermal_expansion_),
+          biot_constant(biot_constant_),
           specific_body_force(std::move(specific_body_force_)),
           has_gravity(has_gravity_)
     {
     }
 
-    HTMaterialProperties(HTMaterialProperties&& other)
-        : porous_media_properties(std::move(other.porous_media_properties)),
-          density_solid(other.density_solid),
-          fluid_reference_density(other.fluid_reference_density),
-          fluid_properties(other.fluid_properties.release()),
-          specific_heat_capacity_solid(other.specific_heat_capacity_solid),
-          thermal_dispersivity_longitudinal(
-              other.thermal_dispersivity_longitudinal),
-          thermal_dispersivity_transversal(
-              other.thermal_dispersivity_transversal),
-          thermal_conductivity_solid(other.thermal_conductivity_solid),
-          thermal_conductivity_fluid(other.thermal_conductivity_fluid),
-          specific_body_force(other.specific_body_force),
-          has_gravity(other.has_gravity)
-    {
-    }
-
-    //! Copies are forbidden.
-    HTMaterialProperties(HTMaterialProperties const&) = delete;
-
-    //! Assignments are not needed.
-    void operator=(HTMaterialProperties const&) = delete;
-
-    //! Assignments are not needed.
-    void operator=(HTMaterialProperties&&) = delete;
-
     MaterialLib::PorousMedium::PorousMediaProperties porous_media_properties;
     Parameter<double> const& density_solid;
-    Parameter<double> const& fluid_reference_density;
     std::unique_ptr<MaterialLib::Fluid::FluidProperties> fluid_properties;
     Parameter<double> const& specific_heat_capacity_solid;
+    bool const has_fluid_thermal_dispersivity;
     Parameter<double> const& thermal_dispersivity_longitudinal;
     Parameter<double> const& thermal_dispersivity_transversal;
     Parameter<double> const& thermal_conductivity_solid;
     Parameter<double> const& thermal_conductivity_fluid;
+
+    bool const has_fluid_thermal_expansion;
+    Parameter<double> const& solid_thermal_expansion;
+    Parameter<double> const& biot_constant;
+
     Eigen::VectorXd const specific_body_force;
     bool const has_gravity;
 };
diff --git a/ProcessLib/HT/HTProcess.cpp b/ProcessLib/HT/HTProcess.cpp
index bcc58332edc133b7f52871e08b8cbcfce10a91df..2890f137a489b19baf4c9cd57a97c5514dd23606 100644
--- a/ProcessLib/HT/HTProcess.cpp
+++ b/ProcessLib/HT/HTProcess.cpp
@@ -13,6 +13,11 @@
 
 #include "ProcessLib/Utils/CreateLocalAssemblers.h"
 
+#include "HTMaterialProperties.h"
+
+#include "MonolithicHTFEM.h"
+#include "StaggeredHTFEM.h"
+
 namespace ProcessLib
 {
 namespace HT
@@ -22,13 +27,16 @@ HTProcess::HTProcess(
     std::unique_ptr<ProcessLib::AbstractJacobianAssembler>&& jacobian_assembler,
     std::vector<std::unique_ptr<ParameterBase>> const& parameters,
     unsigned const integration_order,
-    std::vector<std::reference_wrapper<ProcessVariable>>&& process_variables,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
+        process_variables,
     std::unique_ptr<HTMaterialProperties>&& material_properties,
     SecondaryVariableCollection&& secondary_variables,
-    NumLib::NamedFunctionCaller&& named_function_caller)
+    NumLib::NamedFunctionCaller&& named_function_caller,
+    bool const use_monolithic_scheme)
     : Process(mesh, std::move(jacobian_assembler), parameters,
               integration_order, std::move(process_variables),
-              std::move(secondary_variables), std::move(named_function_caller)),
+              std::move(secondary_variables), std::move(named_function_caller),
+              use_monolithic_scheme),
       _material_properties(std::move(material_properties))
 {
 }
@@ -39,10 +47,23 @@ void HTProcess::initializeConcreteProcess(
     unsigned const integration_order)
 {
     ProcessLib::ProcessVariable const& pv = getProcessVariables()[0];
-    ProcessLib::createLocalAssemblers<LocalAssemblerData>(
-        mesh.getDimension(), mesh.getElements(), dof_table,
-        pv.getShapeFunctionOrder(), _local_assemblers,
-        mesh.isAxiallySymmetric(), integration_order, *_material_properties);
+
+    if (_use_monolithic_scheme)
+    {
+        ProcessLib::createLocalAssemblers<MonolithicHTFEM>(
+            mesh.getDimension(), mesh.getElements(), dof_table,
+            pv.getShapeFunctionOrder(), _local_assemblers,
+            mesh.isAxiallySymmetric(), integration_order,
+            *_material_properties);
+    }
+    else
+    {
+        ProcessLib::createLocalAssemblers<StaggeredHTFEM>(
+            mesh.getDimension(), mesh.getElements(), dof_table,
+            pv.getShapeFunctionOrder(), _local_assemblers,
+            mesh.isAxiallySymmetric(), integration_order,
+            *_material_properties);
+    }
 
     _secondary_variables.addSecondaryVariable(
         "darcy_velocity",
@@ -57,7 +78,26 @@ void HTProcess::assembleConcreteProcess(const double t,
                                         GlobalMatrix& K,
                                         GlobalVector& b)
 {
-    DBUG("Assemble HTProcess.");
+    if (_use_monolithic_scheme)
+    {
+        DBUG("Assemble HTProcess.");
+    }
+    else
+    {
+        if (_coupled_solutions->process_id == 0)
+        {
+            DBUG(
+                "Assemble the equations of heat transport process within "
+                "HTProcess.");
+        }
+        else
+        {
+            DBUG(
+                "Assemble the equations of single phase fully saturated "
+                "fluid flow process within HTProcess.");
+        }
+        setCoupledSolutionsOfPreviousTimeStep();
+    }
 
     // Call global assembler for each local assembly item.
     GlobalExecutor::executeMemberDereferenced(
@@ -72,6 +112,11 @@ void HTProcess::assembleWithJacobianConcreteProcess(
 {
     DBUG("AssembleWithJacobian HTProcess.");
 
+    if (!_use_monolithic_scheme)
+    {
+        setCoupledSolutionsOfPreviousTimeStep();
+    }
+
     // Call global assembler for each local assembly item.
     GlobalExecutor::executeMemberDereferenced(
         _global_assembler, &VectorMatrixAssembler::assembleWithJacobian,
@@ -79,6 +124,41 @@ void HTProcess::assembleWithJacobianConcreteProcess(
         dx_dx, M, K, b, Jac, _coupled_solutions);
 }
 
+void HTProcess::preTimestepConcreteProcess(GlobalVector const& x,
+                                           const double /*t*/,
+                                           const double /*delta_t*/,
+                                           const int process_id)
+{
+    assert(process_id < 2);
+
+    if (_use_monolithic_scheme)
+    {
+        return;
+    }
+
+    if (!_xs_previous_timestep[process_id])
+    {
+        _xs_previous_timestep[process_id] =
+            MathLib::MatrixVectorTraits<GlobalVector>::newInstance(x);
+    }
+    else
+    {
+        auto& x0 = *_xs_previous_timestep[process_id];
+        MathLib::LinAlg::copy(x, x0);
+    }
+
+    auto& x0 = *_xs_previous_timestep[process_id];
+    MathLib::LinAlg::setLocalAccessibleVector(x0);
+}
+
+void HTProcess::setCoupledTermForTheStaggeredSchemeToLocalAssemblers()
+{
+    DBUG("Set the coupled term for the staggered scheme to local assembers.");
+
+    GlobalExecutor::executeMemberOnDereferenced(
+        &HTLocalAssemblerInterface::setStaggeredCoupledSolutions,
+        _local_assemblers, _coupled_solutions);
+}
+
 }  // namespace HT
 }  // namespace ProcessLib
-
diff --git a/ProcessLib/HT/HTProcess.h b/ProcessLib/HT/HTProcess.h
index b04ab174b4a6351e854e5dd9dea1c057b95eb6b1..0c1c94596547ddb4e85633a9bb71514d754ab383 100644
--- a/ProcessLib/HT/HTProcess.h
+++ b/ProcessLib/HT/HTProcess.h
@@ -9,8 +9,8 @@
 
 #pragma once
 
-#include "HTFEM.h"
-#include "HTMaterialProperties.h"
+#include <array>
+
 #include "NumLib/Extrapolation/LocalLinearLeastSquaresExtrapolator.h"
 #include "ProcessLib/Process.h"
 
@@ -18,6 +18,9 @@ namespace ProcessLib
 {
 namespace HT
 {
+class HTLocalAssemblerInterface;
+struct HTMaterialProperties;
+
 /**
  * # HT process
  *
@@ -40,16 +43,18 @@ namespace HT
 class HTProcess final : public Process
 {
 public:
-    HTProcess(MeshLib::Mesh& mesh,
-              std::unique_ptr<ProcessLib::AbstractJacobianAssembler>&&
-                  jacobian_assembler,
-              std::vector<std::unique_ptr<ParameterBase>> const& parameters,
-              unsigned const integration_order,
-              std::vector<std::reference_wrapper<ProcessVariable>>&&
-                  process_variables,
-              std::unique_ptr<HTMaterialProperties>&& material_properties,
-              SecondaryVariableCollection&& secondary_variables,
-              NumLib::NamedFunctionCaller&& named_function_caller);
+    HTProcess(
+        MeshLib::Mesh& mesh,
+        std::unique_ptr<ProcessLib::AbstractJacobianAssembler>&&
+            jacobian_assembler,
+        std::vector<std::unique_ptr<ParameterBase>> const& parameters,
+        unsigned const integration_order,
+        std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
+            process_variables,
+        std::unique_ptr<HTMaterialProperties>&& material_properties,
+        SecondaryVariableCollection&& secondary_variables,
+        NumLib::NamedFunctionCaller&& named_function_caller,
+        bool const use_monolithic_scheme);
 
     //! \name ODESystem interface
     //! @{
@@ -57,6 +62,15 @@ public:
     bool isLinear() const override { return false; }
     //! @}
 
+    // Get the solution of the previous time step.
+    GlobalVector* getPreviousTimeStepSolution(
+        const int process_id) const override
+    {
+        return _xs_previous_timestep[process_id].get();
+    }
+
+    void setCoupledTermForTheStaggeredSchemeToLocalAssemblers() override;
+
 private:
     void initializeConcreteProcess(
         NumLib::LocalToGlobalIndexMap const& dof_table,
@@ -72,9 +86,16 @@ private:
         const double dxdot_dx, const double dx_dx, GlobalMatrix& M,
         GlobalMatrix& K, GlobalVector& b, GlobalMatrix& Jac) override;
 
+    void preTimestepConcreteProcess(GlobalVector const& x, double const t,
+                                    double const dt,
+                                    const int process_id) override;
+
     const std::unique_ptr<HTMaterialProperties> _material_properties;
 
     std::vector<std::unique_ptr<HTLocalAssemblerInterface>> _local_assemblers;
+
+    /// Solutions of the previous time step
+    std::array<std::unique_ptr<GlobalVector>, 2> _xs_previous_timestep;
 };
 
 }  // namespace HT
diff --git a/ProcessLib/HT/MonolithicHTFEM.h b/ProcessLib/HT/MonolithicHTFEM.h
new file mode 100644
index 0000000000000000000000000000000000000000..966d03ae82de31745d31728b8427d252e19c31e5
--- /dev/null
+++ b/ProcessLib/HT/MonolithicHTFEM.h
@@ -0,0 +1,209 @@
+/**
+ * \copyright
+ * Copyright (c) 2012-2017, OpenGeoSys Community (http://www.opengeosys.org)
+ *            Distributed under a Modified BSD License.
+ *              See accompanying file LICENSE.txt or
+ *              http://www.opengeosys.org/project/license
+ *
+ *  \file   MonolithicHTFEM.h
+ *  Created on October 11, 2017, 2:33 PM
+ */
+
+#pragma once
+
+#include <Eigen/Dense>
+#include <vector>
+
+#include "HTMaterialProperties.h"
+#include "NumLib/DOF/DOFTableUtil.h"
+#include "NumLib/Extrapolation/ExtrapolatableElement.h"
+#include "NumLib/Fem/FiniteElement/TemplateIsoparametric.h"
+#include "NumLib/Fem/ShapeMatrixPolicy.h"
+#include "ProcessLib/Parameter/Parameter.h"
+#include "ProcessLib/Utils/InitShapeMatrices.h"
+
+#include "HTFEM.h"
+
+namespace ProcessLib
+{
+namespace HT
+{
+const unsigned NUM_NODAL_DOF = 2;
+
+template <typename ShapeFunction, typename IntegrationMethod,
+          unsigned GlobalDim>
+class MonolithicHTFEM
+    : public HTFEM<ShapeFunction, IntegrationMethod, GlobalDim>
+{
+    using ShapeMatricesType = ShapeMatrixPolicyType<ShapeFunction, GlobalDim>;
+    using ShapeMatrices = typename ShapeMatricesType::ShapeMatrices;
+
+    using LocalMatrixType = typename ShapeMatricesType::template MatrixType<
+        NUM_NODAL_DOF * ShapeFunction::NPOINTS,
+        NUM_NODAL_DOF * ShapeFunction::NPOINTS>;
+    using LocalVectorType =
+        typename ShapeMatricesType::template VectorType<NUM_NODAL_DOF *
+                                                        ShapeFunction::NPOINTS>;
+
+    using NodalVectorType = typename ShapeMatricesType::NodalVectorType;
+    using NodalRowVectorType = typename ShapeMatricesType::NodalRowVectorType;
+
+    using GlobalDimVectorType = typename ShapeMatricesType::GlobalDimVectorType;
+    using GlobalDimMatrixType = typename ShapeMatricesType::GlobalDimMatrixType;
+
+public:
+    MonolithicHTFEM(MeshLib::Element const& element,
+                    std::size_t const local_matrix_size,
+                    bool is_axially_symmetric,
+                    unsigned const integration_order,
+                    HTMaterialProperties const& material_properties)
+        : HTFEM<ShapeFunction, IntegrationMethod, GlobalDim>(
+              element, local_matrix_size, is_axially_symmetric,
+              integration_order, material_properties, NUM_NODAL_DOF)
+    {
+    }
+
+    void assemble(double const t, std::vector<double> const& local_x,
+                  std::vector<double>& local_M_data,
+                  std::vector<double>& local_K_data,
+                  std::vector<double>& local_b_data) override
+    {
+        auto const local_matrix_size = local_x.size();
+        // This assertion is valid only if all nodal d.o.f. use the same shape
+        // matrices.
+        assert(local_matrix_size == ShapeFunction::NPOINTS * NUM_NODAL_DOF);
+
+        auto local_M = MathLib::createZeroedMatrix<LocalMatrixType>(
+            local_M_data, local_matrix_size, local_matrix_size);
+        auto local_K = MathLib::createZeroedMatrix<LocalMatrixType>(
+            local_K_data, local_matrix_size, local_matrix_size);
+        auto local_b = MathLib::createZeroedVector<LocalVectorType>(
+            local_b_data, local_matrix_size);
+
+        auto const num_nodes = ShapeFunction::NPOINTS;
+
+        auto Ktt = local_K.template block<num_nodes, num_nodes>(0, 0);
+        auto Mtt = local_M.template block<num_nodes, num_nodes>(0, 0);
+        auto Kpp =
+            local_K.template block<num_nodes, num_nodes>(num_nodes, num_nodes);
+        auto Mpp =
+            local_M.template block<num_nodes, num_nodes>(num_nodes, num_nodes);
+        auto Bp = local_b.template block<num_nodes, 1>(num_nodes, 0);
+
+        SpatialPosition pos;
+        pos.setElementID(this->_element.getID());
+
+        auto p_nodal_values =
+            Eigen::Map<const NodalVectorType>(&local_x[num_nodes], num_nodes);
+
+        auto const& process_data = this->_material_properties;
+
+        auto const& b = process_data.specific_body_force;
+
+        GlobalDimMatrixType const& I(
+            GlobalDimMatrixType::Identity(GlobalDim, GlobalDim));
+
+        MaterialLib::Fluid::FluidProperty::ArrayType vars;
+
+        unsigned const n_integration_points =
+            this->_integration_method.getNumberOfPoints();
+
+        for (unsigned ip(0); ip < n_integration_points; ip++)
+        {
+            pos.setIntegrationPoint(ip);
+
+            // \todo the argument to getValue() has to be changed for non
+            // constant storage model
+            auto const specific_storage =
+                process_data.porous_media_properties.getSpecificStorage(t, pos)
+                    .getValue(0.0);
+
+            auto const& ip_data = this->_ip_data[ip];
+            auto const& N = ip_data.N;
+            auto const& dNdx = ip_data.dNdx;
+            auto const& w = ip_data.integration_weight;
+
+            double T_int_pt = 0.0;
+            double p_int_pt = 0.0;
+            // Order matters: First T, then P!
+            NumLib::shapeFunctionInterpolate(local_x, N, T_int_pt, p_int_pt);
+
+            // \todo the first argument has to be changed for non constant
+            // porosity model
+            auto const porosity =
+                process_data.porous_media_properties.getPorosity(t, pos)
+                    .getValue(t, pos, 0.0, T_int_pt);
+            auto const intrinsic_permeability =
+                process_data.porous_media_properties.getIntrinsicPermeability(
+                    t, pos).getValue(t, pos, 0.0, T_int_pt);
+
+            vars[static_cast<int>(
+                MaterialLib::Fluid::PropertyVariableType::T)] = T_int_pt;
+            vars[static_cast<int>(
+                MaterialLib::Fluid::PropertyVariableType::p)] = p_int_pt;
+            auto const specific_heat_capacity_fluid =
+                process_data.fluid_properties->getValue(
+                    MaterialLib::Fluid::FluidPropertyType::HeatCapacity, vars);
+
+            // Use the fluid density model to compute the density
+            auto const fluid_density = process_data.fluid_properties->getValue(
+                MaterialLib::Fluid::FluidPropertyType::Density, vars);
+
+            // Use the viscosity model to compute the viscosity
+            auto const viscosity = process_data.fluid_properties->getValue(
+                MaterialLib::Fluid::FluidPropertyType::Viscosity, vars);
+            GlobalDimMatrixType K_over_mu = intrinsic_permeability / viscosity;
+
+            GlobalDimVectorType const velocity =
+                process_data.has_gravity
+                    ? GlobalDimVectorType(-K_over_mu * (dNdx * p_nodal_values -
+                                                        fluid_density * b))
+                    : GlobalDimVectorType(-K_over_mu * dNdx * p_nodal_values);
+
+            // matrix assembly
+            GlobalDimMatrixType const thermal_conductivity_dispersivity =
+                this->getThermalConductivityDispersivity(
+                    t, pos, porosity, fluid_density,
+                    specific_heat_capacity_fluid, velocity, I);
+            Ktt.noalias() +=
+                (dNdx.transpose() * thermal_conductivity_dispersivity * dNdx +
+                 N.transpose() * velocity.transpose() * dNdx * fluid_density *
+                     specific_heat_capacity_fluid) *
+                w;
+            Kpp.noalias() += w * dNdx.transpose() * K_over_mu * dNdx;
+            Mtt.noalias() +=
+                w *
+                this->getHeatEnergyCoefficient(t, pos, porosity, fluid_density,
+                                               specific_heat_capacity_fluid) *
+                N.transpose() * N;
+            Mpp.noalias() += w * N.transpose() * specific_storage * N;
+            if (process_data.has_gravity)
+                Bp += w * fluid_density * dNdx.transpose() * K_over_mu * b;
+            /* with Oberbeck-Boussing assumption density difference only exists
+             * in buoyancy effects */
+        }
+    }
+
+    std::vector<double> const& getIntPtDarcyVelocity(
+        const double t,
+        GlobalVector const& current_solution,
+        NumLib::LocalToGlobalIndexMap const& dof_table,
+        std::vector<double>& cache) const override
+    {
+        auto const indices =
+            NumLib::getIndices(this->_element.getID(), dof_table);
+        assert(!indices.empty());
+        auto local_x = current_solution.get(indices);
+
+        std::vector<double> local_T(
+            std::make_move_iterator(local_x.begin() + local_x.size() / 2),
+            std::make_move_iterator(local_x.end()));
+        // only p is kept in local_x
+        local_x.erase(local_x.begin() + local_x.size() / 2, local_x.end());
+
+        return this->getIntPtDarcyVelocityLocal(t, local_x, local_T, cache);
+    }
+};
+
+}  // namespace HT
+}  // namespace ProcessLib
diff --git a/ProcessLib/HT/StaggeredHTFEM-impl.h b/ProcessLib/HT/StaggeredHTFEM-impl.h
new file mode 100644
index 0000000000000000000000000000000000000000..58313561689f331ad57d1f818fdcfe14fa8da324
--- /dev/null
+++ b/ProcessLib/HT/StaggeredHTFEM-impl.h
@@ -0,0 +1,286 @@
+/**
+ * \copyright
+ * Copyright (c) 2012-2017, OpenGeoSys Community (http://www.opengeosys.org)
+ *            Distributed under a Modified BSD License.
+ *              See accompanying file LICENSE.txt or
+ *              http://www.opengeosys.org/project/license
+ *
+ *  \file   StaggeredHTFEM-impl.h
+ *  Created on October 13, 2017, 3:52 PM
+ */
+
+#pragma once
+
+#include "StaggeredHTFEM.h"
+
+#include "ProcessLib/CoupledSolutionsForStaggeredScheme.h"
+
+namespace ProcessLib
+{
+namespace HT
+{
+template <typename ShapeFunction, typename IntegrationMethod,
+          unsigned GlobalDim>
+void StaggeredHTFEM<ShapeFunction, IntegrationMethod, GlobalDim>::
+    assembleWithCoupledTerm(double const t,
+                            std::vector<double>& local_M_data,
+                            std::vector<double>& local_K_data,
+                            std::vector<double>& local_b_data,
+                            LocalCoupledSolutions const& coupled_xs)
+{
+    if (coupled_xs.process_id == 0)
+    {
+        assembleHeatTransportEquation(t, local_M_data, local_K_data,
+                                      local_b_data, coupled_xs);
+        return;
+    }
+
+    assembleHydraulicEquation(t, local_M_data, local_K_data, local_b_data,
+                              coupled_xs);
+}
+
+template <typename ShapeFunction, typename IntegrationMethod,
+          unsigned GlobalDim>
+void StaggeredHTFEM<ShapeFunction, IntegrationMethod, GlobalDim>::
+    assembleHydraulicEquation(double const t, std::vector<double>& local_M_data,
+                              std::vector<double>& local_K_data,
+                              std::vector<double>& local_b_data,
+                              LocalCoupledSolutions const& coupled_xs)
+{
+    auto const& local_p = coupled_xs.local_coupled_xs[1];
+    auto const local_matrix_size = local_p.size();
+    // This assertion is valid only if all nodal d.o.f. use the same shape
+    // matrices.
+    assert(local_matrix_size == ShapeFunction::NPOINTS);
+
+    auto const& local_T1 = coupled_xs.local_coupled_xs[0];
+    auto const& local_T0 = coupled_xs.local_coupled_xs0[0];
+    const double dt = coupled_xs.dt;
+
+    auto local_M = MathLib::createZeroedMatrix<LocalMatrixType>(
+        local_M_data, local_matrix_size, local_matrix_size);
+    auto local_K = MathLib::createZeroedMatrix<LocalMatrixType>(
+        local_K_data, local_matrix_size, local_matrix_size);
+    auto local_b = MathLib::createZeroedVector<LocalVectorType>(
+        local_b_data, local_matrix_size);
+
+    SpatialPosition pos;
+    pos.setElementID(this->_element.getID());
+
+    auto const& material_properties = this->_material_properties;
+
+    auto const& b = material_properties.specific_body_force;
+
+    MaterialLib::Fluid::FluidProperty::ArrayType vars;
+
+    unsigned const n_integration_points =
+        this->_integration_method.getNumberOfPoints();
+
+    for (unsigned ip(0); ip < n_integration_points; ip++)
+    {
+        pos.setIntegrationPoint(ip);
+
+        auto const& ip_data = this->_ip_data[ip];
+        auto const& N = ip_data.N;
+        auto const& dNdx = ip_data.dNdx;
+        auto const& w = ip_data.integration_weight;
+
+        double p_at_xi = 0.;
+        NumLib::shapeFunctionInterpolate(local_p, N, p_at_xi);
+        double T1_at_xi = 0.;
+        NumLib::shapeFunctionInterpolate(local_T1, N, T1_at_xi);
+
+        auto const porosity =
+            material_properties.porous_media_properties.getPorosity(t, pos)
+                .getValue(t, pos, 0.0, T1_at_xi);
+
+        vars[static_cast<int>(MaterialLib::Fluid::PropertyVariableType::T)] =
+            T1_at_xi;
+        vars[static_cast<int>(MaterialLib::Fluid::PropertyVariableType::p)] =
+            p_at_xi;
+
+        // Use the fluid density model to compute the density
+        auto const fluid_density =
+            material_properties.fluid_properties->getValue(
+                MaterialLib::Fluid::FluidPropertyType::Density, vars);
+        const double dfluid_density_dp =
+            material_properties.fluid_properties->getdValue(
+                MaterialLib::Fluid::FluidPropertyType::Density, vars,
+                MaterialLib::Fluid::PropertyVariableType::p);
+
+        // Use the viscosity model to compute the viscosity
+        auto const viscosity = material_properties.fluid_properties->getValue(
+            MaterialLib::Fluid::FluidPropertyType::Viscosity, vars);
+
+        // \todo the argument to getValue() has to be changed for non
+        // constant storage model
+        auto const specific_storage =
+            material_properties.porous_media_properties
+                .getSpecificStorage(t, pos)
+                .getValue(0.0);
+
+        auto const intrinsic_permeability =
+            material_properties.porous_media_properties
+                .getIntrinsicPermeability(t, pos)
+                .getValue(t, pos, 0.0, T1_at_xi);
+        GlobalDimMatrixType K_over_mu = intrinsic_permeability / viscosity;
+
+        // matrix assembly
+        local_M.noalias() += w * (porosity * dfluid_density_dp / fluid_density +
+                                  specific_storage) *
+                             N.transpose() * N;
+
+        local_K.noalias() += w * dNdx.transpose() * K_over_mu * dNdx;
+
+        if (material_properties.has_gravity)
+        {
+            local_b.noalias() +=
+                w * fluid_density * dNdx.transpose() * K_over_mu * b;
+        }
+
+        if (!material_properties.has_fluid_thermal_expansion)
+            return;
+
+        // Add the thermal expansion term
+        {
+            auto const solid_thermal_expansion =
+                material_properties.solid_thermal_expansion(t, pos)[0];
+            const double dfluid_density_dT =
+                material_properties.fluid_properties->getdValue(
+                    MaterialLib::Fluid::FluidPropertyType::Density, vars,
+                    MaterialLib::Fluid::PropertyVariableType::T);
+            double T0_at_xi = 0.;
+            NumLib::shapeFunctionInterpolate(local_T0, N, T0_at_xi);
+            auto const biot_constant =
+                material_properties.biot_constant(t, pos)[0];
+            const double eff_thermal_expansion =
+                3.0 * (biot_constant - porosity) * solid_thermal_expansion -
+                porosity * dfluid_density_dT / fluid_density;
+            local_b.noalias() +=
+                eff_thermal_expansion * (T1_at_xi - T0_at_xi) * w * N / dt;
+        }
+    }
+}
+
+template <typename ShapeFunction, typename IntegrationMethod,
+          unsigned GlobalDim>
+void StaggeredHTFEM<ShapeFunction, IntegrationMethod, GlobalDim>::
+    assembleHeatTransportEquation(double const t,
+                                  std::vector<double>& local_M_data,
+                                  std::vector<double>& local_K_data,
+                                  std::vector<double>& /*local_b_data*/,
+                                  LocalCoupledSolutions const& coupled_xs)
+{
+    auto const& local_p = coupled_xs.local_coupled_xs[1];
+    auto const local_matrix_size = local_p.size();
+    // This assertion is valid only if all nodal d.o.f. use the same shape
+    // matrices.
+    assert(local_matrix_size == ShapeFunction::NPOINTS);
+
+    auto local_p_Eigen_type =
+        Eigen::Map<const NodalVectorType>(&local_p[0], local_matrix_size);
+
+    auto const& local_T1 = coupled_xs.local_coupled_xs[0];
+
+    auto local_M = MathLib::createZeroedMatrix<LocalMatrixType>(
+        local_M_data, local_matrix_size, local_matrix_size);
+    auto local_K = MathLib::createZeroedMatrix<LocalMatrixType>(
+        local_K_data, local_matrix_size, local_matrix_size);
+
+    SpatialPosition pos;
+    pos.setElementID(this->_element.getID());
+
+    auto const& material_properties = this->_material_properties;
+
+    auto const& b = material_properties.specific_body_force;
+
+    GlobalDimMatrixType const& I(
+        GlobalDimMatrixType::Identity(GlobalDim, GlobalDim));
+
+    MaterialLib::Fluid::FluidProperty::ArrayType vars;
+
+    unsigned const n_integration_points =
+        this->_integration_method.getNumberOfPoints();
+
+    for (unsigned ip(0); ip < n_integration_points; ip++)
+    {
+        pos.setIntegrationPoint(ip);
+
+        auto const& ip_data = this->_ip_data[ip];
+        auto const& N = ip_data.N;
+        auto const& dNdx = ip_data.dNdx;
+        auto const& w = ip_data.integration_weight;
+
+        double p_at_xi = 0.;
+        NumLib::shapeFunctionInterpolate(local_p, N, p_at_xi);
+        double T1_at_xi = 0.;
+        NumLib::shapeFunctionInterpolate(local_T1, N, T1_at_xi);
+
+        auto const porosity =
+            material_properties.porous_media_properties.getPorosity(t, pos)
+                .getValue(t, pos, 0.0, T1_at_xi);
+
+        vars[static_cast<int>(MaterialLib::Fluid::PropertyVariableType::T)] =
+            T1_at_xi;
+        vars[static_cast<int>(MaterialLib::Fluid::PropertyVariableType::p)] =
+            p_at_xi;
+        auto const fluid_density =
+            material_properties.fluid_properties->getValue(
+                MaterialLib::Fluid::FluidPropertyType::Density, vars);
+        auto const specific_heat_capacity_fluid =
+            material_properties.fluid_properties->getValue(
+                MaterialLib::Fluid::FluidPropertyType::HeatCapacity, vars);
+
+        // Assemble mass matrix
+        local_M.noalias() +=
+            w *
+            this->getHeatEnergyCoefficient(t, pos, porosity, fluid_density,
+                                           specific_heat_capacity_fluid) *
+            N.transpose() * N;
+
+        // Assemble Laplace matrix
+        auto const viscosity = material_properties.fluid_properties->getValue(
+            MaterialLib::Fluid::FluidPropertyType::Viscosity, vars);
+
+        auto const intrinsic_permeability =
+            material_properties.porous_media_properties
+                .getIntrinsicPermeability(t, pos)
+                .getValue(t, pos, 0.0, T1_at_xi);
+
+        GlobalDimMatrixType K_over_mu = intrinsic_permeability / viscosity;
+        GlobalDimVectorType const velocity =
+            material_properties.has_gravity
+                ? GlobalDimVectorType(-K_over_mu * (dNdx * local_p_Eigen_type -
+                                                    fluid_density * b))
+                : GlobalDimVectorType(-K_over_mu * dNdx * local_p_Eigen_type);
+
+        GlobalDimMatrixType const thermal_conductivity_dispersivity =
+            this->getThermalConductivityDispersivity(
+                t, pos, porosity, fluid_density, specific_heat_capacity_fluid,
+                velocity, I);
+
+        local_K.noalias() +=
+            w * (dNdx.transpose() * thermal_conductivity_dispersivity * dNdx +
+                 N.transpose() * velocity.transpose() * dNdx * fluid_density *
+                     specific_heat_capacity_fluid);
+    }
+}
+
+template <typename ShapeFunction, typename IntegrationMethod,
+          unsigned GlobalDim>
+std::vector<double> const&
+StaggeredHTFEM<ShapeFunction, IntegrationMethod, GlobalDim>::
+    getIntPtDarcyVelocity(const double t,
+                          GlobalVector const& /*current_solution*/,
+                          NumLib::LocalToGlobalIndexMap const& dof_table,
+                          std::vector<double>& cache) const
+{
+    auto const indices = NumLib::getIndices(this->_element.getID(), dof_table);
+    assert(!indices.empty());
+    auto const local_xs =
+        getCurrentLocalSolutions(*(this->_coupled_solutions), indices);
+
+    return this->getIntPtDarcyVelocityLocal(t, local_xs[0], local_xs[1], cache);
+}
+}  // namespace HT
+}  // namespace ProcessLib
diff --git a/ProcessLib/HT/StaggeredHTFEM.h b/ProcessLib/HT/StaggeredHTFEM.h
new file mode 100644
index 0000000000000000000000000000000000000000..a8b37f16bde70ea8051d991f0208124e9f5f0a0c
--- /dev/null
+++ b/ProcessLib/HT/StaggeredHTFEM.h
@@ -0,0 +1,92 @@
+/**
+ * \copyright
+ * Copyright (c) 2012-2017, OpenGeoSys Community (http://www.opengeosys.org)
+ *            Distributed under a Modified BSD License.
+ *              See accompanying file LICENSE.txt or
+ *              http://www.opengeosys.org/project/license
+ *
+ *  \file   StaggeredHTFEM.h
+ *  Created on October 11, 2017, 1:42 PM
+ */
+
+#pragma once
+
+#include <Eigen/Dense>
+#include <vector>
+
+#include "NumLib/DOF/DOFTableUtil.h"
+#include "NumLib/Extrapolation/ExtrapolatableElement.h"
+#include "NumLib/Fem/FiniteElement/TemplateIsoparametric.h"
+#include "NumLib/Fem/ShapeMatrixPolicy.h"
+#include "ProcessLib/Parameter/Parameter.h"
+#include "ProcessLib/Utils/InitShapeMatrices.h"
+
+#include "HTFEM.h"
+
+namespace ProcessLib
+{
+struct LocalCoupledSolutions;
+
+namespace HT
+{
+template <typename ShapeFunction, typename IntegrationMethod,
+          unsigned GlobalDim>
+class StaggeredHTFEM : public HTFEM<ShapeFunction, IntegrationMethod, GlobalDim>
+{
+    using ShapeMatricesType = ShapeMatrixPolicyType<ShapeFunction, GlobalDim>;
+    using ShapeMatrices = typename ShapeMatricesType::ShapeMatrices;
+
+    using LocalMatrixType =
+        typename ShapeMatricesType::template MatrixType<ShapeFunction::NPOINTS,
+                                                        ShapeFunction::NPOINTS>;
+    using LocalVectorType =
+        typename ShapeMatricesType::template VectorType<ShapeFunction::NPOINTS>;
+
+    using NodalVectorType = typename ShapeMatricesType::NodalVectorType;
+    using NodalRowVectorType = typename ShapeMatricesType::NodalRowVectorType;
+
+    using GlobalDimVectorType = typename ShapeMatricesType::GlobalDimVectorType;
+    using GlobalDimNodalMatrixType =
+        typename ShapeMatricesType::GlobalDimNodalMatrixType;
+    using GlobalDimMatrixType = typename ShapeMatricesType::GlobalDimMatrixType;
+
+public:
+    StaggeredHTFEM(MeshLib::Element const& element,
+                   std::size_t const local_matrix_size,
+                   bool is_axially_symmetric,
+                   unsigned const integration_order,
+                   HTMaterialProperties const& material_properties)
+        : HTFEM<ShapeFunction, IntegrationMethod, GlobalDim>(
+              element, local_matrix_size, is_axially_symmetric,
+              integration_order, material_properties, 1)
+    {
+    }
+
+    void assembleWithCoupledTerm(
+        double const t, std::vector<double>& local_M_data,
+        std::vector<double>& local_K_data, std::vector<double>& local_b_data,
+        LocalCoupledSolutions const& coupled_xs) override;
+
+    std::vector<double> const& getIntPtDarcyVelocity(
+        const double t,
+        GlobalVector const& current_solution,
+        NumLib::LocalToGlobalIndexMap const& dof_table,
+        std::vector<double>& cache) const override;
+
+private:
+    void assembleHydraulicEquation(double const t,
+                                   std::vector<double>& local_M_data,
+                                   std::vector<double>& local_K_data,
+                                   std::vector<double>& local_b_data,
+                                   LocalCoupledSolutions const& coupled_xs);
+
+    void assembleHeatTransportEquation(
+        double const t, std::vector<double>& local_M_data,
+        std::vector<double>& local_K_data, std::vector<double>& local_b_data,
+        LocalCoupledSolutions const& coupled_xs);
+};
+
+}  // namespace HT
+}  // namespace ProcessLib
+
+#include "StaggeredHTFEM-impl.h"
diff --git a/ProcessLib/HT/Tests.cmake b/ProcessLib/HT/Tests.cmake
index f9bad242cc0b46d5aa968b622de9a0588b72ac7e..f25e2368e2e96123b2ca8297885c66ac232fa251 100644
--- a/ProcessLib/HT/Tests.cmake
+++ b/ProcessLib/HT/Tests.cmake
@@ -11,7 +11,21 @@ AddTest(
     VIS ConstViscosityThermalConvection_pcs_0_ts_149_t_50000000000.000000.vtu
 )
 
-# MPI tests
+AddTest(
+    NAME LARGE_2D_ThermalConvection_constviscosityStaggeredScheme
+    PATH Parabolic/HT/StaggeredCoupling/ConstViscosity
+    EXECUTABLE ogs
+    EXECUTABLE_ARGS square_5500x5500_staggered_scheme.prj
+    WRAPPER time
+    TESTER vtkdiff
+    REQUIREMENTS NOT (OGS_USE_LIS OR OGS_USE_MPI)
+    DIFF_DATA
+    square_5500x5500.vtu ConstViscosityThermalConvection_pcs_1_ts_149_t_50000000000.000000.vtu T_ref T 1e-16  1.e-16
+    square_5500x5500.vtu ConstViscosityThermalConvection_pcs_1_ts_149_t_50000000000.000000.vtu p_ref p  1e-16  1.e-16
+    VIS ConstViscosityThermalConvection_pcs_1_ts_149_t_50000000000.000000.vtu
+)
+
+# MPI/PETSc tests
 AddTest(
     NAME Parallel_LARGE_2D_ThermalConvection_constviscosity
     PATH Parabolic/HT/ConstViscosity
@@ -30,3 +44,17 @@ AddTest(
     ConstViscosityThermalConvection_pcs_0_ts_149_t_50000000000_000000_2.vtu ConstViscosityThermalConvection_pcs_0_ts_149_t_50000000000_000000_2.vtu T T 1e-15 1e-14
     ConstViscosityThermalConvection_pcs_0_ts_149_t_50000000000_000000_3.vtu ConstViscosityThermalConvection_pcs_0_ts_149_t_50000000000_000000_3.vtu T T 1e-15 1e-14
 )
+
+AddTest(
+    NAME LARGE_2D_ThermalConvection_constviscosityStaggeredScheme
+    PATH Parabolic/HT/StaggeredCoupling/ConstViscosity
+    EXECUTABLE_ARGS square_5500x5500_staggered_scheme.prj
+    WRAPPER mpirun
+    WRAPPER_ARGS -np 1
+    TESTER vtkdiff
+    REQUIREMENTS OGS_USE_MPI
+    DIFF_DATA
+    square_5500x5500.vtu ConstViscosityThermalConvection_pcs_1_ts_149_t_50000000000_000000_0.vtu T_ref T  1e-14  1.e-14
+    square_5500x5500.vtu ConstViscosityThermalConvection_pcs_1_ts_149_t_50000000000_000000_0.vtu p_ref p  1e-14  1.e-14
+    VIS ConstViscosityThermalConvection_pcs_1_ts_149_t_50000000000.000000_0.vtu
+)
diff --git a/ProcessLib/HeatConduction/CreateHeatConductionProcess.cpp b/ProcessLib/HeatConduction/CreateHeatConductionProcess.cpp
index 7488975e8c0317fd07edcdd5601cfcf29ca21955..e3b49f6ec9a62bb2872de1a1d710ae51f8744190 100644
--- a/ProcessLib/HeatConduction/CreateHeatConductionProcess.cpp
+++ b/ProcessLib/HeatConduction/CreateHeatConductionProcess.cpp
@@ -36,10 +36,13 @@ std::unique_ptr<Process> createHeatConductionProcess(
     //! \ogs_file_param{prj__processes__process__HEAT_CONDUCTION__process_variables}
     auto const pv_config = config.getConfigSubtree("process_variables");
 
-    auto process_variables = findProcessVariables(
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>
+        process_variables;
+    auto per_process_variables = findProcessVariables(
         variables, pv_config,
         {//! \ogs_file_param_special{prj__processes__process__HEAT_CONDUCTION__process_variables__process_variable}
          "process_variable"});
+    process_variables.push_back(std::move(per_process_variables));
 
     // thermal conductivity parameter.
     auto& thermal_conductivity = findParameter<double>(
diff --git a/ProcessLib/HeatConduction/HeatConductionFEM-impl.h b/ProcessLib/HeatConduction/HeatConductionFEM-impl.h
deleted file mode 100644
index 3367e05f4b946f868cf484e153575337a9ad50da..0000000000000000000000000000000000000000
--- a/ProcessLib/HeatConduction/HeatConductionFEM-impl.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/**
- * \copyright
- * Copyright (c) 2012-2017, OpenGeoSys Community (http://www.opengeosys.org)
- *            Distributed under a Modified BSD License.
- *              See accompanying file LICENSE.txt or
- *              http://www.opengeosys.org/project/license
- *
- *  \file   HeatConductionFEM-impl.h
- *  Created on January 17, 2017, 3:41 PM
- */
-
-#pragma once
-
-#include "HeatConductionFEM.h"
-#include "NumLib/Function/Interpolation.h"
-
-namespace ProcessLib
-{
-namespace HeatConduction
-{
-template <typename ShapeFunction, typename IntegrationMethod,
-          unsigned GlobalDim>
-template <typename LiquidFlowVelocityCalculator>
-void LocalAssemblerData<ShapeFunction, IntegrationMethod, GlobalDim>::
-    assembleHeatTransportLiquidFlow(
-        double const t, int const material_id, SpatialPosition& pos,
-        int const gravitational_axis_id,
-        double const gravitational_acceleration,
-        Eigen::MatrixXd const& permeability,
-        ProcessLib::LiquidFlow::LiquidFlowMaterialProperties const&
-            liquid_flow_properties,
-        std::vector<double> const& local_x, std::vector<double> const& local_p,
-        std::vector<double>& local_M_data, std::vector<double>& local_K_data)
-{
-    auto const local_matrix_size = local_x.size();
-    // This assertion is valid only if all nodal d.o.f. use the same shape
-    // matrices.
-    assert(local_matrix_size == ShapeFunction::NPOINTS * NUM_NODAL_DOF);
-
-    auto local_M = MathLib::createZeroedMatrix<NodalMatrixType>(
-        local_M_data, local_matrix_size, local_matrix_size);
-    auto local_K = MathLib::createZeroedMatrix<NodalMatrixType>(
-        local_K_data, local_matrix_size, local_matrix_size);
-    const auto local_p_vec =
-        MathLib::toVector<NodalVectorType>(local_p, local_matrix_size);
-
-    unsigned const n_integration_points =
-        _integration_method.getNumberOfPoints();
-
-    const double porosity_variable = 0.;
-    for (unsigned ip = 0; ip < n_integration_points; ip++)
-    {
-        pos.setIntegrationPoint(ip);
-        auto const& sm = _shape_matrices[ip];
-        auto const& wp = _integration_method.getWeightedPoint(ip);
-        double p = 0.;
-        NumLib::shapeFunctionInterpolate(local_p, sm.N, p);
-        double T = 0.;
-        NumLib::shapeFunctionInterpolate(local_x, sm.N, T);
-
-        // Thermal conductivity of solid phase.
-        auto const k_s = _process_data.thermal_conductivity(t, pos)[0];
-        // Specific heat capacity of solid phase.
-        auto const cp_s = _process_data.heat_capacity(t, pos)[0];
-        // Density of solid phase.
-        auto const rho_s = _process_data.density(t, pos)[0];
-
-        // Thermal conductivity of liquid.
-        double const k_f = liquid_flow_properties.getThermalConductivity(p, T);
-        // Specific heat capacity of liquid.
-        double const cp_f = liquid_flow_properties.getHeatCapacity(p, T);
-        // Density of liquid.
-        double const rho_f = liquid_flow_properties.getLiquidDensity(p, T);
-
-        // Porosity of porous media.
-        double const n = liquid_flow_properties.getPorosity(
-            material_id, t, pos, porosity_variable, T);
-
-        // Effective specific heat capacity.
-        double const effective_cp = (1.0 - n) * cp_s * rho_s + n * cp_f * rho_f;
-        // Effective thermal conductivity.
-        double const effective_K = (1.0 - n) * k_s + n * k_f;
-
-        double const integration_factor =
-            sm.detJ * wp.getWeight() * sm.integralMeasure;
-
-        local_M.noalias() +=
-            effective_cp * sm.N.transpose() * sm.N * integration_factor;
-        local_K.noalias() +=
-            sm.dNdx.transpose() * effective_K * sm.dNdx * integration_factor;
-
-        // Compute fluid flow velocity:
-        double const mu =
-            liquid_flow_properties.getViscosity(p, T);  // Viscosity
-        GlobalDimVectorType const& darcy_velocity =
-            LiquidFlowVelocityCalculator::calculateVelocity(
-                local_p_vec, sm, permeability, mu,
-                rho_f * gravitational_acceleration, gravitational_axis_id);
-        // Add the advection term
-        local_K.noalias() += cp_f * rho_f * sm.N.transpose() *
-                             (darcy_velocity.transpose() * sm.dNdx) *
-                             integration_factor;
-    }
-}
-
-template <typename ShapeFunction, typename IntegrationMethod,
-          unsigned GlobalDim>
-void LocalAssemblerData<ShapeFunction, IntegrationMethod, GlobalDim>::
-    assembleWithCoupledTerm(double const t, std::vector<double> const& local_x,
-                            std::vector<double>& local_M_data,
-                            std::vector<double>& local_K_data,
-                            std::vector<double>& /*local_b_data*/,
-                            LocalCoupledSolutions const& coupled_term)
-{
-    for (auto const& coupled_process_pair : coupled_term.coupled_processes)
-    {
-        if (coupled_process_pair.first ==
-            std::type_index(typeid(ProcessLib::LiquidFlow::LiquidFlowProcess)))
-        {
-            assert(
-                dynamic_cast<const ProcessLib::LiquidFlow::LiquidFlowProcess*>(
-                    &(coupled_process_pair.second)) != nullptr);
-
-            auto const& pcs =
-                static_cast<ProcessLib::LiquidFlow::LiquidFlowProcess const&>(
-                    coupled_process_pair.second);
-            const auto liquid_flow_prop = pcs.getLiquidFlowMaterialProperties();
-
-            const auto local_p =
-                coupled_term.local_coupled_xs.at(coupled_process_pair.first);
-
-            SpatialPosition pos;
-            pos.setElementID(_element.getID());
-            const int material_id = liquid_flow_prop->getMaterialID(pos);
-
-            const Eigen::MatrixXd& K = liquid_flow_prop->getPermeability(
-                material_id, t, pos, _element.getDimension());
-
-            if (K.size() == 1)
-            {
-                assembleHeatTransportLiquidFlow<
-                    IsotropicLiquidFlowVelocityCalculator>(
-                    t, material_id, pos, pcs.getGravitationalAxisID(),
-                    pcs.getGravitationalAcceleration(), K, *liquid_flow_prop,
-                    local_x, local_p, local_M_data, local_K_data);
-            }
-            else
-            {
-                assembleHeatTransportLiquidFlow<
-                    AnisotropicLiquidFlowVelocityCalculator>(
-                    t, material_id, pos, pcs.getGravitationalAxisID(),
-                    pcs.getGravitationalAcceleration(), K, *liquid_flow_prop,
-                    local_x, local_p, local_M_data, local_K_data);
-            }
-        }
-        else
-        {
-            OGS_FATAL(
-                "This coupled process is not presented for "
-                "HeatConduction process");
-        }
-    }
-}
-
-}  // namespace HeatConduction
-}  // namespace ProcessLib
diff --git a/ProcessLib/HeatConduction/HeatConductionFEM.h b/ProcessLib/HeatConduction/HeatConductionFEM.h
index 3d61130db91af89fb5f602f1dc8c9f32664ed8d0..3f8d58c5f3ce340cd95b51083451f5666679b035 100644
--- a/ProcessLib/HeatConduction/HeatConductionFEM.h
+++ b/ProcessLib/HeatConduction/HeatConductionFEM.h
@@ -21,10 +21,6 @@
 #include "ProcessLib/Parameter/Parameter.h"
 #include "ProcessLib/Utils/InitShapeMatrices.h"
 
-// For coupling
-#include "ProcessLib/LiquidFlow/LiquidFlowProcess.h"
-#include "ProcessLib/LiquidFlow/LiquidFlowMaterialProperties.h"
-
 namespace ProcessLib
 {
 namespace HeatConduction
@@ -131,12 +127,6 @@ public:
         }
     }
 
-    void assembleWithCoupledTerm(
-        double const t, std::vector<double> const& local_x,
-        std::vector<double>& local_M_data, std::vector<double>& local_K_data,
-        std::vector<double>& /*local_b_data*/,
-        LocalCoupledSolutions const& coupled_term) override;
-
     void computeSecondaryVariableConcrete(
         const double t, std::vector<double> const& local_x) override
     {
@@ -216,83 +206,7 @@ private:
         _shape_matrices;
 
     std::vector<std::vector<double>> _heat_fluxes;
-
-    /**
-     * @brief Assemble local matrices and vectors of the equations of
-     *        heat transport process in porous media with full saturated liquid
-     *        flow.
-     *
-     * @param t                          Time.
-     * @param material_id                Material ID of the element.
-     * @param pos                        Position (t, x) of the element.
-     * @param gravitational_axis_id      The ID of the axis, which indicates the
-     *                                   direction of gravity portion of the
-     *                                   Darcy's velocity.
-     * @param gravitational_acceleration Gravitational acceleration, 9.81 in the
-     *                                   SI unit standard.
-     * @param permeability               Intrinsic permeability of liquid
-     *                                   in porous media.
-     * @param liquid_flow_properties     Liquid flow properties.
-     * @param local_x                    Local vector of unknowns, e.g.
-     *                                   nodal temperatures of the element.
-     * @param local_p                    Local vector of nodal pore pressures.
-     * @param local_M_data               Local mass matrix.
-     * @param local_K_data               Local Laplace matrix.
-     */
-    template <typename LiquidFlowVelocityCalculator>
-    void assembleHeatTransportLiquidFlow(
-        double const t, int const material_id, SpatialPosition& pos,
-        int const gravitational_axis_id,
-        double const gravitational_acceleration,
-        Eigen::MatrixXd const& permeability,
-        ProcessLib::LiquidFlow::LiquidFlowMaterialProperties const&
-            liquid_flow_properties,
-        std::vector<double> const& local_x, std::vector<double> const& local_p,
-        std::vector<double>& local_M_data, std::vector<double>& local_K_data);
-
-    /// Calculator of liquid flow velocity for isotropic permeability
-    /// tensor
-    struct IsotropicLiquidFlowVelocityCalculator
-    {
-        static GlobalDimVectorType calculateVelocity(
-            Eigen::Map<const NodalVectorType> const& local_p,
-            ShapeMatrices const& sm, Eigen::MatrixXd const& permeability,
-            double const mu, double const rho_g,
-            int const gravitational_axis_id)
-        {
-            const double K = permeability(0, 0) / mu;
-            // Compute the velocity
-            GlobalDimVectorType darcy_velocity = -K * sm.dNdx * local_p;
-            // gravity term
-            if (gravitational_axis_id >= 0)
-                darcy_velocity[gravitational_axis_id] -= K * rho_g;
-            return darcy_velocity;
-        }
-    };
-
-    /// Calculator of liquid flow velocity for anisotropic permeability
-    /// tensor
-    struct AnisotropicLiquidFlowVelocityCalculator
-    {
-        static GlobalDimVectorType calculateVelocity(
-            Eigen::Map<const NodalVectorType> const& local_p,
-            ShapeMatrices const& sm, Eigen::MatrixXd const& permeability,
-            double const mu, double const rho_g,
-            int const gravitational_axis_id)
-        {
-            GlobalDimVectorType darcy_velocity =
-                -permeability * sm.dNdx * local_p / mu;
-            if (gravitational_axis_id >= 0)
-            {
-                darcy_velocity.noalias() -=
-                    rho_g * permeability.col(gravitational_axis_id) / mu;
-            }
-            return darcy_velocity;
-        }
-    };
 };
 
 }  // namespace HeatConduction
 }  // namespace ProcessLib
-
-#include "HeatConductionFEM-impl.h"
diff --git a/ProcessLib/HeatConduction/HeatConductionProcess.cpp b/ProcessLib/HeatConduction/HeatConductionProcess.cpp
index 986aeb31e463d99c99515d87ec887939375adb03..cf2e4bc68b2db86afef0049eac0f28e861b053c2 100644
--- a/ProcessLib/HeatConduction/HeatConductionProcess.cpp
+++ b/ProcessLib/HeatConduction/HeatConductionProcess.cpp
@@ -22,7 +22,8 @@ HeatConductionProcess::HeatConductionProcess(
     std::unique_ptr<ProcessLib::AbstractJacobianAssembler>&& jacobian_assembler,
     std::vector<std::unique_ptr<ParameterBase>> const& parameters,
     unsigned const integration_order,
-    std::vector<std::reference_wrapper<ProcessVariable>>&& process_variables,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
+        process_variables,
     HeatConductionProcessData&& process_data,
     SecondaryVariableCollection&& secondary_variables,
     NumLib::NamedFunctionCaller&& named_function_caller)
@@ -33,23 +34,6 @@ HeatConductionProcess::HeatConductionProcess(
 {
 }
 
-void HeatConductionProcess::preTimestepConcreteProcess(GlobalVector const& x,
-                                            const double /*t*/,
-                                            const double /*delta_t*/,
-                                            const int /*process_id*/)
-{
-    if (!_x_previous_timestep)
-    {
-        _x_previous_timestep =
-            MathLib::MatrixVectorTraits<GlobalVector>::newInstance(x);
-    }
-    else
-    {
-        auto& x0 = *_x_previous_timestep;
-        MathLib::LinAlg::copy(x, x0);
-    }
-}
-
 void HeatConductionProcess::initializeConcreteProcess(
     NumLib::LocalToGlobalIndexMap const& dof_table,
     MeshLib::Mesh const& mesh,
diff --git a/ProcessLib/HeatConduction/HeatConductionProcess.h b/ProcessLib/HeatConduction/HeatConductionProcess.h
index 3965ef37f5e79ea0be5e4519815868c882999e7a..4d8d2b331250c0614610ee5af703bbf9d5b89ec7 100644
--- a/ProcessLib/HeatConduction/HeatConductionProcess.h
+++ b/ProcessLib/HeatConduction/HeatConductionProcess.h
@@ -27,7 +27,7 @@ public:
             jacobian_assembler,
         std::vector<std::unique_ptr<ParameterBase>> const& parameters,
         unsigned const integration_order,
-        std::vector<std::reference_wrapper<ProcessVariable>>&&
+        std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
             process_variables,
         HeatConductionProcessData&& process_data,
         SecondaryVariableCollection&& secondary_variables,
@@ -41,16 +41,6 @@ public:
     void computeSecondaryVariableConcrete(
         double const t, GlobalVector const& x) override;
 
-    void preTimestepConcreteProcess(GlobalVector const& x, const double t,
-                                    const double delta_t,
-                                    const int process_id) override;
-
-    // Get the solution of the previous time step.
-    GlobalVector* getPreviousTimeStepSolution() const override
-    {
-        return _x_previous_timestep.get();
-    }
-
 private:
     void initializeConcreteProcess(
         NumLib::LocalToGlobalIndexMap const& dof_table,
diff --git a/ProcessLib/HydroMechanics/CreateHydroMechanicsProcess.cpp b/ProcessLib/HydroMechanics/CreateHydroMechanicsProcess.cpp
index 31774e41bdbab2c78a2ba0886beedd5134f110a9..46bffb117a7eb5847078fc370aef1dae63d84fbb 100644
--- a/ProcessLib/HydroMechanics/CreateHydroMechanicsProcess.cpp
+++ b/ProcessLib/HydroMechanics/CreateHydroMechanicsProcess.cpp
@@ -35,40 +35,68 @@ std::unique_ptr<Process> createHydroMechanicsProcess(
     config.checkConfigParameter("type", "HYDRO_MECHANICS");
     DBUG("Create HydroMechanicsProcess.");
 
+    auto const staggered_scheme =
+        //! \ogs_file_param{prj__processes__process__HYDRO_MECHANICS__coupling_scheme}
+        config.getConfigParameterOptional<std::string>("coupling_scheme");
+    const bool use_monolithic_scheme =
+        (staggered_scheme && (*staggered_scheme == "staggered")) ? false : true;
+
     // Process variable.
 
     //! \ogs_file_param{prj__processes__process__HYDRO_MECHANICS__process_variables}
     auto const pv_config = config.getConfigSubtree("process_variables");
 
-    auto process_variables = findProcessVariables(
-        variables, pv_config,
-        {//! \ogs_file_param_special{prj__processes__process__HYDRO_MECHANICS__process_variables__pressure}
-         "pressure",
-         //! \ogs_file_param_special{prj__processes__process__HYDRO_MECHANICS__process_variables__displacement}
-         "displacement"});
+    ProcessVariable* variable_p;
+    ProcessVariable* variable_u;
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>
+        process_variables;
+    if (use_monolithic_scheme)  // monolithic scheme.
+    {
+        auto per_process_variables = findProcessVariables(
+            variables, pv_config,
+            {//! \ogs_file_param_special{prj__processes__process__HYDRO_MECHANICS__process_variables__pressure}
+            "pressure",
+            //! \ogs_file_param_special{prj__processes__process__HYDRO_MECHANICS__process_variables__displacement}
+            "displacement"});
+        variable_p = &per_process_variables[0].get();
+        variable_u = &per_process_variables[1].get();
+        process_variables.push_back(std::move(per_process_variables));
+    }
+    else  // staggered scheme.
+    {
+        using namespace std::string_literals;
+        for (auto const& variable_name : {"pressure"s, "displacement"s})
+        {
+            auto per_process_variables =
+                findProcessVariables(variables, pv_config, {variable_name});
+            process_variables.push_back(std::move(per_process_variables));
+        }
+        variable_p = &process_variables[0][0].get();
+        variable_u = &process_variables[1][0].get();
+    }
 
     DBUG("Associate displacement with process variable \'%s\'.",
-         process_variables[1].get().getName().c_str());
+         variable_u->getName().c_str());
 
-    if (process_variables[1].get().getNumberOfComponents() != DisplacementDim)
+    if (variable_u->getNumberOfComponents() != DisplacementDim)
     {
         OGS_FATAL(
             "Number of components of the process variable '%s' is different "
             "from the displacement dimension: got %d, expected %d",
-            process_variables[1].get().getName().c_str(),
-            process_variables[1].get().getNumberOfComponents(),
+            variable_u->getName().c_str(),
+            variable_u->getNumberOfComponents(),
             DisplacementDim);
     }
 
     DBUG("Associate pressure with process variable \'%s\'.",
-         process_variables[0].get().getName().c_str());
-    if (process_variables[0].get().getNumberOfComponents() != 1)
+         variable_p->getName().c_str());
+    if (variable_p->getNumberOfComponents() != 1)
     {
         OGS_FATAL(
             "Pressure process variable '%s' is not a scalar variable but has "
             "%d components.",
-            process_variables[0].get().getName().c_str(),
-            process_variables[0].get().getNumberOfComponents());
+            variable_p->getName().c_str(),
+            variable_p->getNumberOfComponents());
     }
 
     // Constitutive relation.
@@ -190,7 +218,8 @@ std::unique_ptr<Process> createHydroMechanicsProcess(
     return std::make_unique<HydroMechanicsProcess<DisplacementDim>>(
         mesh, std::move(jacobian_assembler), parameters, integration_order,
         std::move(process_variables), std::move(process_data),
-        std::move(secondary_variables), std::move(named_function_caller));
+        std::move(secondary_variables), std::move(named_function_caller),
+        use_monolithic_scheme);
 }
 
 template std::unique_ptr<Process> createHydroMechanicsProcess<2>(
diff --git a/ProcessLib/HydroMechanics/HydroMechanicsProcess-impl.h b/ProcessLib/HydroMechanics/HydroMechanicsProcess-impl.h
index 8f6ebb41846ef000230db7cde3a0198ed787ecb8..e2be230ccadcdbd6e6f9ed0d7d0e872353728234 100644
--- a/ProcessLib/HydroMechanics/HydroMechanicsProcess-impl.h
+++ b/ProcessLib/HydroMechanics/HydroMechanicsProcess-impl.h
@@ -28,13 +28,16 @@ HydroMechanicsProcess<DisplacementDim>::HydroMechanicsProcess(
     std::unique_ptr<ProcessLib::AbstractJacobianAssembler>&& jacobian_assembler,
     std::vector<std::unique_ptr<ParameterBase>> const& parameters,
     unsigned const integration_order,
-    std::vector<std::reference_wrapper<ProcessVariable>>&& process_variables,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
+        process_variables,
     HydroMechanicsProcessData<DisplacementDim>&& process_data,
     SecondaryVariableCollection&& secondary_variables,
-    NumLib::NamedFunctionCaller&& named_function_caller)
+    NumLib::NamedFunctionCaller&& named_function_caller,
+    bool const use_monolithic_scheme)
     : Process(mesh, std::move(jacobian_assembler), parameters,
               integration_order, std::move(process_variables),
-              std::move(secondary_variables), std::move(named_function_caller)),
+              std::move(secondary_variables), std::move(named_function_caller),
+              use_monolithic_scheme),
       _process_data(std::move(process_data))
 {
 }
diff --git a/ProcessLib/HydroMechanics/HydroMechanicsProcess.h b/ProcessLib/HydroMechanics/HydroMechanicsProcess.h
index 3197f0524e9a8307102ac7b5304bff70e23428a9..bae5bb8a29d8cc2dedd22b24a8c2cdf694941194 100644
--- a/ProcessLib/HydroMechanics/HydroMechanicsProcess.h
+++ b/ProcessLib/HydroMechanics/HydroMechanicsProcess.h
@@ -35,11 +35,12 @@ public:
             jacobian_assembler,
         std::vector<std::unique_ptr<ParameterBase>> const& parameters,
         unsigned const integration_order,
-        std::vector<std::reference_wrapper<ProcessVariable>>&&
+        std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
             process_variables,
         HydroMechanicsProcessData<DisplacementDim>&& process_data,
         SecondaryVariableCollection&& secondary_variables,
-        NumLib::NamedFunctionCaller&& named_function_caller);
+        NumLib::NamedFunctionCaller&& named_function_caller,
+        bool const use_monolithic_scheme);
 
     //! \name ODESystem interface
     //! @{
diff --git a/ProcessLib/LIE/HydroMechanics/CreateHydroMechanicsProcess.cpp b/ProcessLib/LIE/HydroMechanics/CreateHydroMechanicsProcess.cpp
index 2379a87c6564842bf13c4869f0849c9c809d7e71..eb08b52f095a072364604b5678cd901d81e3ff37 100644
--- a/ProcessLib/LIE/HydroMechanics/CreateHydroMechanicsProcess.cpp
+++ b/ProcessLib/LIE/HydroMechanics/CreateHydroMechanicsProcess.cpp
@@ -39,6 +39,11 @@ std::unique_ptr<Process> createHydroMechanicsProcess(
     //! \ogs_file_param{prj__processes__process__type}
     config.checkConfigParameter("type", "HYDRO_MECHANICS_WITH_LIE");
     DBUG("Create HydroMechanicsProcess with LIE.");
+    auto const staggered_scheme =
+        //! \ogs_file_param{prj__processes__process__HYDRO_MECHANICS_WITH_LIE__coupling_scheme}
+        config.getConfigParameterOptional<std::string>("coupling_scheme");
+    const bool use_monolithic_scheme =
+        (staggered_scheme && (*staggered_scheme == "staggered")) ? false : true;
 
     // Process variables
     //! \ogs_file_param{prj__processes__process__HYDRO_MECHANICS_WITH_LIE__process_variables}
@@ -46,7 +51,11 @@ std::unique_ptr<Process> createHydroMechanicsProcess(
     auto range =
         //! \ogs_file_param{prj__processes__process__HYDRO_MECHANICS_WITH_LIE__process_variables__process_variable}
         pv_conf.getConfigParameterList<std::string>("process_variable");
-    std::vector<std::reference_wrapper<ProcessVariable>> process_variables;
+    std::vector<std::reference_wrapper<ProcessVariable>> p_u_process_variables;
+    std::vector<std::reference_wrapper<ProcessVariable>> p_process_variables;
+    std::vector<std::reference_wrapper<ProcessVariable>> u_process_variables;
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>
+        process_variables;
     for (std::string const& pv_name : range)
     {
         if (pv_name != "pressure" && pv_name != "displacement" &&
@@ -84,12 +93,36 @@ std::unique_ptr<Process> createHydroMechanicsProcess(
                 GlobalDim);
         }
 
-        process_variables.emplace_back(const_cast<ProcessVariable&>(*variable));
+        if (!use_monolithic_scheme)
+        {
+            if (pv_name == "pressure")
+                p_process_variables.emplace_back(
+                    const_cast<ProcessVariable&>(*variable));
+            else
+            {
+                u_process_variables.emplace_back(
+                    const_cast<ProcessVariable&>(*variable));
+            }
+        }
+        else
+        {
+            p_u_process_variables.emplace_back(
+                const_cast<ProcessVariable&>(*variable));
+        }
     }
 
-    if (process_variables.size() > 3)
+    if (p_u_process_variables.size() > 3 || u_process_variables.size() > 2)
         OGS_FATAL("Currently only one displacement jump is supported");
 
+    if (!use_monolithic_scheme)
+    {
+        process_variables.push_back(std::move(p_process_variables));
+        process_variables.push_back(std::move(u_process_variables));
+    }
+    else
+        process_variables.push_back(std::move(p_u_process_variables));
+
+
     // Constitutive relation.
     // read type;
     auto const constitutive_relation_config =
@@ -301,7 +334,8 @@ std::unique_ptr<Process> createHydroMechanicsProcess(
     return std::make_unique<HydroMechanicsProcess<GlobalDim>>(
         mesh, std::move(jacobian_assembler), parameters, integration_order,
         std::move(process_variables), std::move(process_data),
-        std::move(secondary_variables), std::move(named_function_caller));
+        std::move(secondary_variables), std::move(named_function_caller),
+        use_monolithic_scheme);
 }
 
 template std::unique_ptr<Process> createHydroMechanicsProcess<2>(
diff --git a/ProcessLib/LIE/HydroMechanics/HydroMechanicsProcess.cpp b/ProcessLib/LIE/HydroMechanics/HydroMechanicsProcess.cpp
index 10cf6ac5b974aa6b68da0b62656a8e21a94de4bb..5d3ff9fd3d025bd5901479f0daebda2c4c8212a9 100644
--- a/ProcessLib/LIE/HydroMechanics/HydroMechanicsProcess.cpp
+++ b/ProcessLib/LIE/HydroMechanics/HydroMechanicsProcess.cpp
@@ -38,13 +38,15 @@ HydroMechanicsProcess<GlobalDim>::HydroMechanicsProcess(
     std::unique_ptr<ProcessLib::AbstractJacobianAssembler>&& jacobian_assembler,
     std::vector<std::unique_ptr<ParameterBase>> const& parameters,
     unsigned const integration_order,
-    std::vector<std::reference_wrapper<ProcessVariable>>&& process_variables,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&& process_variables,
     HydroMechanicsProcessData<GlobalDim>&& process_data,
     SecondaryVariableCollection&& secondary_variables,
-    NumLib::NamedFunctionCaller&& named_function_caller)
+    NumLib::NamedFunctionCaller&& named_function_caller,
+    bool const use_monolithic_scheme)
     : Process(mesh, std::move(jacobian_assembler), parameters,
               integration_order, std::move(process_variables),
-              std::move(secondary_variables), std::move(named_function_caller)),
+              std::move(secondary_variables), std::move(named_function_caller),
+              use_monolithic_scheme),
       _process_data(std::move(process_data))
 {
     INFO("[LIE/HM] looking for fracture elements in the given mesh");
diff --git a/ProcessLib/LIE/HydroMechanics/HydroMechanicsProcess.h b/ProcessLib/LIE/HydroMechanics/HydroMechanicsProcess.h
index 8296526de0de829f4654494986b6f6736d527b08..5c5f47330f8b5e75f3fb1ab7584dcea8438a22da 100644
--- a/ProcessLib/LIE/HydroMechanics/HydroMechanicsProcess.h
+++ b/ProcessLib/LIE/HydroMechanics/HydroMechanicsProcess.h
@@ -38,11 +38,12 @@ public:
             jacobian_assembler,
         std::vector<std::unique_ptr<ParameterBase>> const& parameters,
         unsigned const integration_order,
-        std::vector<std::reference_wrapper<ProcessVariable>>&&
+        std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
             process_variables,
         HydroMechanicsProcessData<GlobalDim>&& process_data,
         SecondaryVariableCollection&& secondary_variables,
-        NumLib::NamedFunctionCaller&& named_function_caller);
+        NumLib::NamedFunctionCaller&& named_function_caller,
+        bool const use_monolithic_scheme);
 
     //! \name ODESystem interface
     //! @{
diff --git a/ProcessLib/LIE/SmallDeformation/CreateSmallDeformationProcess.cpp b/ProcessLib/LIE/SmallDeformation/CreateSmallDeformationProcess.cpp
index 94dbf3d18c498e4c3e1bc6bc839b6f76494492ec..972d33dc3b89824d15bcc0db8f13e2b01ec8e07d 100644
--- a/ProcessLib/LIE/SmallDeformation/CreateSmallDeformationProcess.cpp
+++ b/ProcessLib/LIE/SmallDeformation/CreateSmallDeformationProcess.cpp
@@ -46,7 +46,8 @@ std::unique_ptr<Process> createSmallDeformationProcess(
     auto range =
         //! \ogs_file_param{prj__processes__process__SMALL_DEFORMATION_WITH_LIE__process_variables__process_variable}
         pv_conf.getConfigParameterList<std::string>("process_variable");
-    std::vector<std::reference_wrapper<ProcessVariable>> process_variables;
+    std::vector<std::reference_wrapper<ProcessVariable>> per_process_variables;
+
     for (std::string const& pv_name : range)
     {
         if (pv_name != "displacement" && pv_name.find("displacement_jump") != 0)
@@ -71,27 +72,30 @@ std::unique_ptr<Process> createSmallDeformationProcess(
         DBUG("Found process variable \'%s\' for config tag <%s>.",
              variable->getName().c_str(), "process_variable");
 
-        process_variables.emplace_back(const_cast<ProcessVariable&>(*variable));
+        per_process_variables.emplace_back(const_cast<ProcessVariable&>(*variable));
     }
-    auto const n_fractures = process_variables.size() - 1;
+    auto const n_fractures = per_process_variables.size() - 1;
     if (n_fractures < 1)
     {
         OGS_FATAL("No displacement jump variables are specified");
     }
 
     DBUG("Associate displacement with process variable \'%s\'.",
-         process_variables.back().get().getName().c_str());
+         per_process_variables.back().get().getName().c_str());
 
-    if (process_variables.back().get().getNumberOfComponents() !=
+    if (per_process_variables.back().get().getNumberOfComponents() !=
         DisplacementDim)
     {
         OGS_FATAL(
             "Number of components of the process variable '%s' is different "
             "from the displacement dimension: got %d, expected %d",
-            process_variables.back().get().getName().c_str(),
-            process_variables.back().get().getNumberOfComponents(),
+            per_process_variables.back().get().getName().c_str(),
+            per_process_variables.back().get().getNumberOfComponents(),
             DisplacementDim);
     }
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>
+        process_variables;
+    process_variables.push_back(std::move(per_process_variables));
 
     // Constitutive relation.
     // read type;
diff --git a/ProcessLib/LIE/SmallDeformation/SmallDeformationProcess.cpp b/ProcessLib/LIE/SmallDeformation/SmallDeformationProcess.cpp
index 4a2c7b8cbb2a32f844cffb896820cac2b590e851..b84c82b5fba2c26726dd718a27ba00072b0a39ad 100644
--- a/ProcessLib/LIE/SmallDeformation/SmallDeformationProcess.cpp
+++ b/ProcessLib/LIE/SmallDeformation/SmallDeformationProcess.cpp
@@ -31,16 +31,20 @@ namespace SmallDeformation
 template <int DisplacementDim>
 SmallDeformationProcess<DisplacementDim>::SmallDeformationProcess(
     MeshLib::Mesh& mesh,
-    std::unique_ptr<ProcessLib::AbstractJacobianAssembler>&& jacobian_assembler,
+    std::unique_ptr<ProcessLib::AbstractJacobianAssembler>&&
+        jacobian_assembler,
     std::vector<std::unique_ptr<ParameterBase>> const& parameters,
     unsigned const integration_order,
-    std::vector<std::reference_wrapper<ProcessVariable>>&& process_variables,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
+        process_variables,
     SmallDeformationProcessData<DisplacementDim>&& process_data,
     SecondaryVariableCollection&& secondary_variables,
     NumLib::NamedFunctionCaller&& named_function_caller)
     : Process(mesh, std::move(jacobian_assembler), parameters,
-              integration_order, std::move(process_variables),
-              std::move(secondary_variables), std::move(named_function_caller)),
+              integration_order,
+              std::move(process_variables),
+              std::move(secondary_variables),
+              std::move(named_function_caller)),
       _process_data(std::move(process_data))
 {
     getFractureMatrixDataInMesh(mesh,
diff --git a/ProcessLib/LIE/SmallDeformation/SmallDeformationProcess.h b/ProcessLib/LIE/SmallDeformation/SmallDeformationProcess.h
index f8b7f076eae1a3b5e106ad4c5f81640704744add..55c2f0ee97bda6a1f0629db00835d680c2f881ca 100644
--- a/ProcessLib/LIE/SmallDeformation/SmallDeformationProcess.h
+++ b/ProcessLib/LIE/SmallDeformation/SmallDeformationProcess.h
@@ -36,7 +36,7 @@ public:
             jacobian_assembler,
         std::vector<std::unique_ptr<ParameterBase>> const& parameters,
         unsigned const integration_order,
-        std::vector<std::reference_wrapper<ProcessVariable>>&&
+        std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
             process_variables,
         SmallDeformationProcessData<DisplacementDim>&& process_data,
         SecondaryVariableCollection&& secondary_variables,
diff --git a/ProcessLib/LiquidFlow/CreateLiquidFlowMaterialProperties.cpp b/ProcessLib/LiquidFlow/CreateLiquidFlowMaterialProperties.cpp
index 5ac06dc06d76cd11cd6a2d5cfdc12bea665a6f47..5ece9fa1970086d1366d82b73741ba4c1998800a 100644
--- a/ProcessLib/LiquidFlow/CreateLiquidFlowMaterialProperties.cpp
+++ b/ProcessLib/LiquidFlow/CreateLiquidFlowMaterialProperties.cpp
@@ -18,6 +18,7 @@
 #include "MeshLib/PropertyVector.h"
 
 #include "MaterialLib/Fluid/FluidProperty.h"
+#include "MaterialLib/PorousMedium/Permeability/Permeability.h"
 #include "MaterialLib/PorousMedium/Porosity/Porosity.h"
 #include "MaterialLib/PorousMedium/Storage/Storage.h"
 #include "MaterialLib/Fluid/FluidProperties/CreateFluidProperties.h"
@@ -26,7 +27,6 @@
 #include "MaterialLib/PorousMedium/PorousPropertyHeaders.h"
 
 #include "ProcessLib/Utils/ProcessUtils.h"
-#include "ProcessLib/Parameter/ConstantParameter.h"
 
 #include "LiquidFlowMaterialProperties.h"
 
@@ -97,32 +97,10 @@ createLiquidFlowMaterialProperties(
     BaseLib::reorderVector(porosity_models, mat_ids);
     BaseLib::reorderVector(storage_models, mat_ids);
 
-    //! \ogs_file_param{prj__processes__process__LIQUID_FLOW__material_property__solid}
-    auto const solid_config = config.getConfigSubtreeOptional("solid");
-    if (solid_config)
-    {
-        auto& solid_thermal_expansion = findParameter<double>(
-            //! \ogs_file_param_special{prj__processes__process__LIQUID_FLOW__material_property__solid__thermal_expansion}
-            *solid_config, "thermal_expansion", parameters, 1);
-        DBUG("Use \'%s\' as solid thermal expansion.",
-             solid_thermal_expansion.name.c_str());
-        auto& biot_constant = findParameter<double>(
-            //! \ogs_file_param_special{prj__processes__process__LIQUID_FLOW__material_property__solid__biot_constant}
-            *solid_config, "biot_constant", parameters, 1);
-        return std::make_unique<LiquidFlowMaterialProperties>(
-            std::move(fluid_properties),
-            std::move(intrinsic_permeability_models),
-            std::move(porosity_models), std::move(storage_models),
-            has_material_ids, material_ids, solid_thermal_expansion,
-            biot_constant);
-    }
-
-    ConstantParameter<double> void_parameter("void_solid_thermal_expansion",
-                                             0.);
     return std::make_unique<LiquidFlowMaterialProperties>(
         std::move(fluid_properties), std::move(intrinsic_permeability_models),
         std::move(porosity_models), std::move(storage_models), has_material_ids,
-        material_ids, void_parameter, void_parameter);
+        material_ids);
 }
 
 }  // end of namespace
diff --git a/ProcessLib/LiquidFlow/CreateLiquidFlowProcess.cpp b/ProcessLib/LiquidFlow/CreateLiquidFlowProcess.cpp
index 01c570cbe7c8f4413596a046608965a7fe9136ea..f242237a328d9a99a2b68b180c2eabe6d893463d 100644
--- a/ProcessLib/LiquidFlow/CreateLiquidFlowProcess.cpp
+++ b/ProcessLib/LiquidFlow/CreateLiquidFlowProcess.cpp
@@ -43,10 +43,13 @@ std::unique_ptr<Process> createLiquidFlowProcess(
     //! \ogs_file_param{prj__processes__process__LIQUID_FLOW__process_variables}
     auto const pv_config = config.getConfigSubtree("process_variables");
 
-    auto process_variables = findProcessVariables(
+    auto per_process_variables = findProcessVariables(
         variables, pv_config,
         {//! \ogs_file_param_special{prj__processes__process__LIQUID_FLOW__process_variables__process_variable}
          "process_variable"});
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>
+        process_variables;
+    process_variables.push_back(std::move(per_process_variables));
 
     SecondaryVariableCollection secondary_variables;
 
diff --git a/ProcessLib/LiquidFlow/LiquidFlowLocalAssembler-impl.h b/ProcessLib/LiquidFlow/LiquidFlowLocalAssembler-impl.h
index 6e6c923c5ddfddba549ec1adb416fd709275b93d..4a52debc71c0c7c0f2b56c1ba1c9207cea3e9f30 100644
--- a/ProcessLib/LiquidFlow/LiquidFlowLocalAssembler-impl.h
+++ b/ProcessLib/LiquidFlow/LiquidFlowLocalAssembler-impl.h
@@ -16,10 +16,6 @@
 
 #include "NumLib/Function/Interpolation.h"
 
-#include "ProcessLib/CoupledSolutionsForStaggeredScheme.h"
-
-#include "ProcessLib/HeatConduction/HeatConductionProcess.h"
-
 namespace ProcessLib
 {
 namespace LiquidFlow
@@ -53,58 +49,6 @@ void LiquidFlowLocalAssembler<ShapeFunction, IntegrationMethod, GlobalDim>::
             pos, permeability);
 }
 
-template <typename ShapeFunction, typename IntegrationMethod,
-          unsigned GlobalDim>
-void LiquidFlowLocalAssembler<ShapeFunction, IntegrationMethod, GlobalDim>::
-    assembleWithCoupledTerm(double const t, std::vector<double> const& local_x,
-                            std::vector<double>& local_M_data,
-                            std::vector<double>& local_K_data,
-                            std::vector<double>& local_b_data,
-                            LocalCoupledSolutions const& coupled_term)
-{
-    SpatialPosition pos;
-    pos.setElementID(_element.getID());
-    const int material_id = _material_properties.getMaterialID(pos);
-
-    const Eigen::MatrixXd& permeability = _material_properties.getPermeability(
-        material_id, t, pos, _element.getDimension());
-    // Note: For Inclined 1D in 2D/3D or 2D element in 3D, the first item in
-    //  the assert must be changed to permeability.rows() ==
-    //  _element->getDimension()
-    assert(permeability.rows() == GlobalDim || permeability.rows() == 1);
-
-    const double dt = coupled_term.dt;
-    for (auto const& coupled_process_pair : coupled_term.coupled_processes)
-    {
-        if (coupled_process_pair.first ==
-            std::type_index(
-                typeid(ProcessLib::HeatConduction::HeatConductionProcess)))
-        {
-            const auto local_T0 =
-                coupled_term.local_coupled_xs0.at(coupled_process_pair.first);
-            const auto local_T1 =
-                coupled_term.local_coupled_xs.at(coupled_process_pair.first);
-
-            if (permeability.size() == 1)  // isotropic or 1D problem.
-                assembleWithCoupledWithHeatTransport<IsotropicCalculator>(
-                    material_id, t, dt, local_x, local_T0, local_T1,
-                    local_M_data, local_K_data, local_b_data, pos,
-                    permeability);
-            else
-                assembleWithCoupledWithHeatTransport<AnisotropicCalculator>(
-                    material_id, t, dt, local_x, local_T0, local_T1,
-                    local_M_data, local_K_data, local_b_data, pos,
-                    permeability);
-        }
-        else
-        {
-            OGS_FATAL(
-                "This coupled process is not presented for "
-                "LiquidFlow process");
-        }
-    }
-}
-
 template <typename ShapeFunction, typename IntegrationMethod,
           unsigned GlobalDim>
 template <typename LaplacianGravityVelocityCalculator>
@@ -166,81 +110,6 @@ void LiquidFlowLocalAssembler<ShapeFunction, IntegrationMethod, GlobalDim>::
     }
 }
 
-template <typename ShapeFunction, typename IntegrationMethod,
-          unsigned GlobalDim>
-template <typename LaplacianGravityVelocityCalculator>
-void LiquidFlowLocalAssembler<ShapeFunction, IntegrationMethod, GlobalDim>::
-    assembleWithCoupledWithHeatTransport(
-        const int material_id, double const t, double const dt,
-        std::vector<double> const& local_x, std::vector<double> const& local_T0,
-        std::vector<double> const& local_T1, std::vector<double>& local_M_data,
-        std::vector<double>& local_K_data, std::vector<double>& local_b_data,
-        SpatialPosition const& pos, Eigen::MatrixXd const& permeability)
-{
-    auto const local_matrix_size = local_x.size();
-    assert(local_matrix_size == ShapeFunction::NPOINTS);
-
-    auto local_M = MathLib::createZeroedMatrix<NodalMatrixType>(
-        local_M_data, local_matrix_size, local_matrix_size);
-    auto local_K = MathLib::createZeroedMatrix<NodalMatrixType>(
-        local_K_data, local_matrix_size, local_matrix_size);
-    auto local_b = MathLib::createZeroedVector<NodalVectorType>(
-        local_b_data, local_matrix_size);
-
-    unsigned const n_integration_points =
-        _integration_method.getNumberOfPoints();
-
-    // TODO: The following two variables should be calculated inside the
-    //       the integration loop for non-constant porosity and storage models.
-    double porosity_variable = 0.;
-    double storage_variable = 0.;
-    for (unsigned ip = 0; ip < n_integration_points; ip++)
-    {
-        auto const& sm = _shape_matrices[ip];
-        auto const& wp = _integration_method.getWeightedPoint(ip);
-
-        double p = 0.;
-        NumLib::shapeFunctionInterpolate(local_x, sm.N, p);
-        double T0 = 0.;
-        NumLib::shapeFunctionInterpolate(local_T0, sm.N, T0);
-        double T = 0.;
-        NumLib::shapeFunctionInterpolate(local_T1, sm.N, T);
-
-        const double integration_factor =
-            sm.integralMeasure * sm.detJ * wp.getWeight();
-
-        // Assemble mass matrix, M
-        local_M.noalias() += _material_properties.getMassCoefficient(
-                                 material_id, t, pos, porosity_variable,
-                                 storage_variable, p, T) *
-                             sm.N.transpose() * sm.N * integration_factor;
-
-        // Compute density:
-        const double rho = _material_properties.getLiquidDensity(p, T);
-        const double rho_g = rho * _gravitational_acceleration;
-
-        // Compute viscosity:
-        const double mu = _material_properties.getViscosity(p, T);
-
-        // Assemble Laplacian, K, and RHS by the gravitational term
-        LaplacianGravityVelocityCalculator::calculateLaplacianAndGravityTerm(
-            local_K, local_b, sm, permeability, integration_factor, mu, rho_g,
-            _gravitational_axis_id);
-
-        // Add the thermal expansion term
-        auto const solid_thermal_expansion =
-            _material_properties.getSolidThermalExpansion(t, pos);
-        auto const biot_constant = _material_properties.getBiotConstant(t, pos);
-        auto const porosity = _material_properties.getPorosity(
-            material_id, t, pos, porosity_variable, T);
-        const double eff_thermal_expansion =
-            3.0 * (biot_constant - porosity) * solid_thermal_expansion -
-            porosity * _material_properties.getdLiquidDensity_dT(p, T) / rho;
-        local_b.noalias() +=
-            eff_thermal_expansion * (T - T0) * integration_factor * sm.N / dt;
-    }
-}
-
 template <typename ShapeFunction, typename IntegrationMethod,
           unsigned GlobalDim>
 std::vector<double> const&
@@ -268,40 +137,13 @@ LiquidFlowLocalAssembler<ShapeFunction, IntegrationMethod, GlobalDim>::
     //  the assert must be changed to perm.rows() == _element->getDimension()
     assert(permeability.rows() == GlobalDim || permeability.rows() == 1);
 
-    if (!_coupled_solutions)
-    {
-        computeDarcyVelocity(permeability, local_x, veloctiy_cache_vectors);
-    }
-    else
-    {
-        auto const local_coupled_xs =
-            getCurrentLocalSolutionsOfCoupledProcesses(
-                _coupled_solutions->coupled_xs, indices);
-        if (local_coupled_xs.empty())
-            computeDarcyVelocity(permeability, local_x, veloctiy_cache_vectors);
-        else
-            computeDarcyVelocityWithCoupling(permeability, local_x,
-                                             local_coupled_xs,
-                                             veloctiy_cache_vectors);
-    }
-
-    return veloctiy_cache;
-}
-
-template <typename ShapeFunction, typename IntegrationMethod,
-          unsigned GlobalDim>
-void LiquidFlowLocalAssembler<ShapeFunction, IntegrationMethod, GlobalDim>::
-    computeDarcyVelocity(
-        Eigen::MatrixXd const& permeability,
-        std::vector<double> const& local_x,
-        MatrixOfVelocityAtIntegrationPoints& darcy_velocity_at_ips) const
-{
     if (permeability.size() == 1)  // isotropic or 1D problem.
         computeDarcyVelocityLocal<IsotropicCalculator>(local_x, permeability,
-                                                       darcy_velocity_at_ips);
+                                                       veloctiy_cache_vectors);
     else
-        computeDarcyVelocityLocal<AnisotropicCalculator>(local_x, permeability,
-                                                         darcy_velocity_at_ips);
+        computeDarcyVelocityLocal<AnisotropicCalculator>(
+            local_x, permeability, veloctiy_cache_vectors);
+    return veloctiy_cache;
 }
 
 template <typename ShapeFunction, typename IntegrationMethod,
@@ -341,65 +183,6 @@ void LiquidFlowLocalAssembler<ShapeFunction, IntegrationMethod, GlobalDim>::
     }
 }
 
-template <typename ShapeFunction, typename IntegrationMethod,
-          unsigned GlobalDim>
-void LiquidFlowLocalAssembler<ShapeFunction, IntegrationMethod, GlobalDim>::
-    computeDarcyVelocityWithCoupling(
-        Eigen::MatrixXd const& permeability, std::vector<double> const& local_x,
-        std::unordered_map<std::type_index, const std::vector<double>> const&
-            coupled_local_solutions,
-        MatrixOfVelocityAtIntegrationPoints& darcy_velocity_at_ips) const
-{
-    const auto local_T = coupled_local_solutions.at(std::type_index(
-        typeid(ProcessLib::HeatConduction::HeatConductionProcess)));
-
-    if (permeability.size() == 1)  // isotropic or 1D problem.
-        computeDarcyVelocityCoupledWithHeatTransportLocal<IsotropicCalculator>(
-            local_x, local_T, permeability, darcy_velocity_at_ips);
-    else
-        computeDarcyVelocityCoupledWithHeatTransportLocal<
-            AnisotropicCalculator>(local_x, local_T, permeability,
-                                   darcy_velocity_at_ips);
-}
-
-template <typename ShapeFunction, typename IntegrationMethod,
-          unsigned GlobalDim>
-template <typename LaplacianGravityVelocityCalculator>
-void LiquidFlowLocalAssembler<ShapeFunction, IntegrationMethod, GlobalDim>::
-    computeDarcyVelocityCoupledWithHeatTransportLocal(
-        std::vector<double> const& local_x,
-        std::vector<double> const& local_T,
-        Eigen::MatrixXd const& permeability,
-        MatrixOfVelocityAtIntegrationPoints& darcy_velocity_at_ips) const
-{
-    auto const local_matrix_size = local_x.size();
-    assert(local_matrix_size == ShapeFunction::NPOINTS);
-
-    const auto local_p_vec =
-        MathLib::toVector<NodalVectorType>(local_x, local_matrix_size);
-
-    unsigned const n_integration_points =
-        _integration_method.getNumberOfPoints();
-
-    for (unsigned ip = 0; ip < n_integration_points; ip++)
-    {
-        auto const& sm = _shape_matrices[ip];
-        double p = 0.;
-        NumLib::shapeFunctionInterpolate(local_x, sm.N, p);
-        double T = 0.;
-        NumLib::shapeFunctionInterpolate(local_T, sm.N, T);
-
-        const double rho_g = _material_properties.getLiquidDensity(p, T) *
-                             _gravitational_acceleration;
-        // Compute viscosity:
-        const double mu = _material_properties.getViscosity(p, T);
-
-        LaplacianGravityVelocityCalculator::calculateVelocity(
-            ip, local_p_vec, sm, permeability, mu, rho_g,
-            _gravitational_axis_id, darcy_velocity_at_ips);
-    }
-}
-
 template <typename ShapeFunction, typename IntegrationMethod,
           unsigned GlobalDim>
 void LiquidFlowLocalAssembler<ShapeFunction, IntegrationMethod, GlobalDim>::
diff --git a/ProcessLib/LiquidFlow/LiquidFlowLocalAssembler.h b/ProcessLib/LiquidFlow/LiquidFlowLocalAssembler.h
index a5b29cf92816f9b4157722360e2c9abcc6ad112b..d56ff9496d9e0b9fd9572241eac3a154652d87ed 100644
--- a/ProcessLib/LiquidFlow/LiquidFlowLocalAssembler.h
+++ b/ProcessLib/LiquidFlow/LiquidFlowLocalAssembler.h
@@ -31,7 +31,6 @@
 
 namespace ProcessLib
 {
-struct CoupledSolutionsForStaggeredScheme;
 
 namespace LiquidFlow
 {
@@ -42,28 +41,11 @@ class LiquidFlowLocalAssemblerInterface
       public NumLib::ExtrapolatableElement
 {
 public:
-    LiquidFlowLocalAssemblerInterface(
-        CoupledSolutionsForStaggeredScheme* const coupled_solutions)
-        : _coupled_solutions(coupled_solutions)
-    {
-    }
-
-    void setCoupledSolutionsForStaggeredScheme(
-        std::size_t const /*mesh_item_id*/,
-        CoupledSolutionsForStaggeredScheme* const coupled_solutions)
-    {
-        _coupled_solutions = coupled_solutions;
-    }
-
     virtual std::vector<double> const& getIntPtDarcyVelocity(
         const double /*t*/,
         GlobalVector const& /*current_solution*/,
         NumLib::LocalToGlobalIndexMap const& /*dof_table*/,
         std::vector<double>& /*cache*/) const = 0;
-
-protected:
-    /// Pointer that is set from a Process class.
-    CoupledSolutionsForStaggeredScheme* _coupled_solutions;
 };
 
 template <typename ShapeFunction, typename IntegrationMethod,
@@ -92,10 +74,8 @@ public:
         int const gravitational_axis_id,
         double const gravitational_acceleration,
         double const reference_temperature,
-        LiquidFlowMaterialProperties const& material_propertries,
-        CoupledSolutionsForStaggeredScheme* coupled_solutions)
-        : LiquidFlowLocalAssemblerInterface(coupled_solutions),
-          _element(element),
+        LiquidFlowMaterialProperties const& material_propertries)
+        : _element(element),
           _integration_method(integration_order),
           _shape_matrices(initShapeMatrices<ShapeFunction, ShapeMatricesType,
                                             IntegrationMethod, GlobalDim>(
@@ -112,12 +92,6 @@ public:
                   std::vector<double>& local_K_data,
                   std::vector<double>& local_b_data) override;
 
-    void assembleWithCoupledTerm(
-        double const t, std::vector<double> const& local_x,
-        std::vector<double>& local_M_data, std::vector<double>& local_K_data,
-        std::vector<double>& local_b_data,
-        LocalCoupledSolutions const& coupled_term) override;
-
     Eigen::Map<const Eigen::RowVectorXd> getShapeMatrix(
         const unsigned integration_point) const override
     {
@@ -191,38 +165,12 @@ private:
                                  SpatialPosition const& pos,
                                  Eigen::MatrixXd const& permeability);
 
-    template <typename LaplacianGravityVelocityCalculator>
-    void assembleWithCoupledWithHeatTransport(
-        const int material_id, double const t, double const dt,
-        std::vector<double> const& local_x, std::vector<double> const& local_T0,
-        std::vector<double> const& local_T1, std::vector<double>& local_M_data,
-        std::vector<double>& local_K_data, std::vector<double>& local_b_data,
-        SpatialPosition const& pos, Eigen::MatrixXd const& permeability);
-
-    void computeDarcyVelocity(
-        Eigen::MatrixXd const& permeability,
-        std::vector<double> const& local_x,
-        MatrixOfVelocityAtIntegrationPoints& darcy_velocity_at_ips) const;
-
-    void computeDarcyVelocityWithCoupling(
-        Eigen::MatrixXd const& permeability, std::vector<double> const& local_x,
-        std::unordered_map<std::type_index, const std::vector<double>> const&
-            coupled_local_solutions,
-        MatrixOfVelocityAtIntegrationPoints& darcy_velocity_at_ips) const;
-
     template <typename LaplacianGravityVelocityCalculator>
     void computeDarcyVelocityLocal(
         std::vector<double> const& local_x,
         Eigen::MatrixXd const& permeability,
         MatrixOfVelocityAtIntegrationPoints& darcy_velocity_at_ips) const;
 
-    template <typename LaplacianGravityVelocityCalculator>
-    void computeDarcyVelocityCoupledWithHeatTransportLocal(
-        std::vector<double> const& local_x,
-        std::vector<double> const& local_T,
-        Eigen::MatrixXd const& permeability,
-        MatrixOfVelocityAtIntegrationPoints& darcy_velocity_at_ips) const;
-
     const int _gravitational_axis_id;
     const double _gravitational_acceleration;
     const double _reference_temperature;
diff --git a/ProcessLib/LiquidFlow/LiquidFlowMaterialProperties.cpp b/ProcessLib/LiquidFlow/LiquidFlowMaterialProperties.cpp
index a6297371edcad2104c9f09404e4565d157c77965..d6748d70adf92ca102d42129d38103592fc6f707 100644
--- a/ProcessLib/LiquidFlow/LiquidFlowMaterialProperties.cpp
+++ b/ProcessLib/LiquidFlow/LiquidFlowMaterialProperties.cpp
@@ -18,10 +18,9 @@
 
 #include "MeshLib/PropertyVector.h"
 
-#include "ProcessLib/Parameter/Parameter.h"
 #include "ProcessLib/Parameter/SpatialPosition.h"
 
-#include "MaterialLib/Fluid/FluidProperty.h"
+#include "MaterialLib/PorousMedium/Permeability/Permeability.h"
 #include "MaterialLib/PorousMedium/Porosity/Porosity.h"
 #include "MaterialLib/PorousMedium/Storage/Storage.h"
 
@@ -54,17 +53,6 @@ double LiquidFlowMaterialProperties::getLiquidDensity(const double p,
         MaterialLib::Fluid::FluidPropertyType::Density, vars);
 }
 
-double LiquidFlowMaterialProperties::getdLiquidDensity_dT(const double p,
-                                                          const double T) const
-{
-    ArrayType vars;
-    vars[static_cast<int>(MaterialLib::Fluid::PropertyVariableType::T)] = T;
-    vars[static_cast<int>(MaterialLib::Fluid::PropertyVariableType::p)] = p;
-    return _fluid_properties->getdValue(
-        MaterialLib::Fluid::FluidPropertyType::Density, vars,
-        MaterialLib::Fluid::PropertyVariableType::T);
-}
-
 double LiquidFlowMaterialProperties::getViscosity(const double p,
                                                   const double T) const
 {
@@ -75,24 +63,14 @@ double LiquidFlowMaterialProperties::getViscosity(const double p,
         MaterialLib::Fluid::FluidPropertyType::Viscosity, vars);
 }
 
-double LiquidFlowMaterialProperties::getHeatCapacity(const double p,
-                                                     const double T) const
-{
-    ArrayType vars;
-    vars[static_cast<int>(MaterialLib::Fluid::PropertyVariableType::T)] = T;
-    vars[static_cast<int>(MaterialLib::Fluid::PropertyVariableType::p)] = p;
-    return _fluid_properties->getValue(
-        MaterialLib::Fluid::FluidPropertyType::HeatCapacity, vars);
-}
-
-double LiquidFlowMaterialProperties::getThermalConductivity(
-    const double p, const double T) const
+double LiquidFlowMaterialProperties::getPorosity(const int material_id,
+                                                 const double t,
+                                                 const SpatialPosition& pos,
+                                                 const double porosity_variable,
+                                                 const double T) const
 {
-    ArrayType vars;
-    vars[static_cast<int>(MaterialLib::Fluid::PropertyVariableType::T)] = T;
-    vars[static_cast<int>(MaterialLib::Fluid::PropertyVariableType::p)] = p;
-    return _fluid_properties->getValue(
-        MaterialLib::Fluid::FluidPropertyType::ThermalConductivity, vars);
+    return _porosity_models[material_id]->getValue(t, pos, porosity_variable,
+                                                   T);
 }
 
 double LiquidFlowMaterialProperties::getMassCoefficient(
@@ -125,17 +103,5 @@ Eigen::MatrixXd const& LiquidFlowMaterialProperties::getPermeability(
                                                                  0.0);
 }
 
-double LiquidFlowMaterialProperties::getSolidThermalExpansion(
-    const double t, const SpatialPosition& pos) const
-{
-    return _solid_thermal_expansion(t, pos)[0];
-}
-
-double LiquidFlowMaterialProperties::getBiotConstant(
-    const double t, const SpatialPosition& pos) const
-{
-    return _biot_constant(t, pos)[0];
-}
-
 }  // end of namespace
 }  // end of namespace
diff --git a/ProcessLib/LiquidFlow/LiquidFlowMaterialProperties.h b/ProcessLib/LiquidFlow/LiquidFlowMaterialProperties.h
index 8145d0bd743c02a0c0e572b312d1bc204ac70393..55b21322cb1c940e1f3bb4c2bf859cdb92617c3d 100644
--- a/ProcessLib/LiquidFlow/LiquidFlowMaterialProperties.h
+++ b/ProcessLib/LiquidFlow/LiquidFlowMaterialProperties.h
@@ -19,16 +19,19 @@
 #include "MaterialLib/Fluid/FluidProperty.h"
 #include "MaterialLib/Fluid/FluidProperties/FluidProperties.h"
 
-#include "MaterialLib/PorousMedium/Porosity/Porosity.h"
-#include "MaterialLib/PorousMedium/Permeability/Permeability.h"
-#include "MaterialLib/PorousMedium/Storage/Storage.h"
-
 namespace MaterialLib
 {
 namespace Fluid
 {
 class FluidProperties;
 }
+
+namespace PorousMedium
+{
+class Permeability;
+class Porosity;
+class Storage;
+}
 }
 
 namespace BaseLib
@@ -44,9 +47,6 @@ class PropertyVector;
 
 namespace ProcessLib
 {
-template <typename T>
-struct Parameter;
-
 class SpatialPosition;
 
 namespace LiquidFlow
@@ -69,18 +69,14 @@ public:
         std::vector<std::unique_ptr<MaterialLib::PorousMedium::Storage>>&&
             storage_models,
         bool const has_material_ids,
-        MeshLib::PropertyVector<int> const& material_ids,
-        Parameter<double> const& solid_thermal_expansion,
-        Parameter<double> const& biot_constant)
+        MeshLib::PropertyVector<int> const& material_ids)
         : _has_material_ids(has_material_ids),
           _material_ids(material_ids),
           _fluid_properties(std::move(fluid_properties)),
           _intrinsic_permeability_models(
               std::move(intrinsic_permeability_models)),
           _porosity_models(std::move(porosity_models)),
-          _storage_models(std::move(storage_models)),
-          _solid_thermal_expansion(solid_thermal_expansion),
-          _biot_constant(biot_constant)
+          _storage_models(std::move(storage_models))
     {
     }
 
@@ -115,26 +111,11 @@ public:
 
     double getLiquidDensity(const double p, const double T) const;
 
-    double getdLiquidDensity_dT(const double p, const double T) const;
-
     double getViscosity(const double p, const double T) const;
 
-    double getHeatCapacity(const double p, const double T) const;
-
-    double getThermalConductivity(const double p, const double T) const;
-
     double getPorosity(const int material_id, const double t,
                        const SpatialPosition& pos,
-                       const double porosity_variable, const double T) const
-    {
-        return _porosity_models[material_id]->getValue(t, pos,
-                                                       porosity_variable, T);
-    }
-
-    double getSolidThermalExpansion(const double t,
-                                    const SpatialPosition& pos) const;
-
-    double getBiotConstant(const double t, const SpatialPosition& pos) const;
+                       const double porosity_variable, const double T) const;
 
 private:
     /// A flag to indicate whether the reference member, _material_ids,
@@ -155,8 +136,6 @@ private:
     const std::vector<std::unique_ptr<MaterialLib::PorousMedium::Storage>>
         _storage_models;
 
-    Parameter<double> const& _solid_thermal_expansion;
-    Parameter<double> const& _biot_constant;
     // Note: For the statistical data of porous media, they could be read from
     // vtu files directly. This can be done by using property vectors directly.
     // Such property vectors will be added here if they are needed.
diff --git a/ProcessLib/LiquidFlow/LiquidFlowProcess.cpp b/ProcessLib/LiquidFlow/LiquidFlowProcess.cpp
index 2d2d07408011da89fe66ebecf77a87ee57ac2d27..f55c2db7b92ee15bc661d3b1f40689f9db692360 100644
--- a/ProcessLib/LiquidFlow/LiquidFlowProcess.cpp
+++ b/ProcessLib/LiquidFlow/LiquidFlowProcess.cpp
@@ -20,6 +20,7 @@
 #include "LiquidFlowLocalAssembler.h"
 #include "LiquidFlowMaterialProperties.h"
 
+#include "MaterialLib/PorousMedium/Permeability/Permeability.h"
 #include "MaterialLib/PorousMedium/Porosity/Porosity.h"
 #include "MaterialLib/PorousMedium/Storage/Storage.h"
 
@@ -34,7 +35,8 @@ LiquidFlowProcess::LiquidFlowProcess(
     std::unique_ptr<AbstractJacobianAssembler>&& jacobian_assembler,
     std::vector<std::unique_ptr<ParameterBase>> const& parameters,
     unsigned const integration_order,
-    std::vector<std::reference_wrapper<ProcessVariable>>&& process_variables,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
+        process_variables,
     SecondaryVariableCollection&& secondary_variables,
     NumLib::NamedFunctionCaller&& named_function_caller,
     MeshLib::PropertyVector<int> const& material_ids,
@@ -66,7 +68,7 @@ void LiquidFlowProcess::initializeConcreteProcess(
         pv.getShapeFunctionOrder(), _local_assemblers,
         mesh.isAxiallySymmetric(), integration_order, _gravitational_axis_id,
         _gravitational_acceleration, _reference_temperature,
-        *_material_properties, _coupled_solutions);
+        *_material_properties);
 
     _secondary_variables.addSecondaryVariable(
         "darcy_velocity",
@@ -112,14 +114,5 @@ void LiquidFlowProcess::computeSecondaryVariableConcrete(const double t,
         _coupled_solutions);
 }
 
-void LiquidFlowProcess::setCoupledSolutionsForStaggeredSchemeToLocalAssemblers()
-{
-    DBUG("Compute the velocity for LiquidFlowProcess.");
-    GlobalExecutor::executeMemberOnDereferenced(
-        &LiquidFlowLocalAssemblerInterface::
-            setCoupledSolutionsForStaggeredScheme,
-        _local_assemblers, _coupled_solutions);
-}
-
 }  // end of namespace
 }  // end of namespace
diff --git a/ProcessLib/LiquidFlow/LiquidFlowProcess.h b/ProcessLib/LiquidFlow/LiquidFlowProcess.h
index 1e6fca223ef38fe7debdab056aa514a41506c2a5..4d4879254fecd37df5adef922754ce74c61c0c66 100644
--- a/ProcessLib/LiquidFlow/LiquidFlowProcess.h
+++ b/ProcessLib/LiquidFlow/LiquidFlowProcess.h
@@ -62,7 +62,7 @@ public:
         std::unique_ptr<AbstractJacobianAssembler>&& jacobian_assembler,
         std::vector<std::unique_ptr<ParameterBase>> const& parameters,
         unsigned const integration_order,
-        std::vector<std::reference_wrapper<ProcessVariable>>&&
+        std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
             process_variables,
         SecondaryVariableCollection&& secondary_variables,
         NumLib::NamedFunctionCaller&& named_function_caller,
@@ -83,13 +83,6 @@ public:
         return _gravitational_acceleration;
     }
 
-    LiquidFlowMaterialProperties* getLiquidFlowMaterialProperties() const
-    {
-        return _material_properties.get();
-    }
-
-    void setCoupledSolutionsForStaggeredSchemeToLocalAssemblers() override;
-
 private:
     void initializeConcreteProcess(
         NumLib::LocalToGlobalIndexMap const& dof_table,
diff --git a/ProcessLib/LiquidFlow/Tests.cmake b/ProcessLib/LiquidFlow/Tests.cmake
index 544ed2a5a2b998d7f4e6fe3c48d7a9e61527af6f..dafcce389bb3dd7d84f415847afbfb7054ac19d8 100644
--- a/ProcessLib/LiquidFlow/Tests.cmake
+++ b/ProcessLib/LiquidFlow/Tests.cmake
@@ -70,61 +70,6 @@ AddTest(
     hex.vtu isotropic_gravity_driven3D_pcs_0_ts_1_t_1.000000.vtu analytic_pressure pressure 1e-6 1e-6
 )
 
-# Coupling
-AddTest(
-    NAME StaggeredTH_ThermalDensityDrivenFlow2D
-    PATH StaggeredCoupledProcesses/TH/ThermalDensityDrivenFlow2D
-    EXECUTABLE ogs
-    EXECUTABLE_ARGS thermal_gravity_driven_flow2d.prj
-    WRAPPER time
-    TESTER vtkdiff
-    REQUIREMENTS NOT OGS_USE_MPI
-    DIFF_DATA
-    mesh2D.vtu gravity_driven_pcs_1_ts_10_t_300.000000.vtu OGS5_PRESSURE1 pressure 1.1 1e-7
-    mesh2D.vtu gravity_driven_pcs_1_ts_10_t_300.000000.vtu OGS5_TEMPERATURE1 temperature 1.1 1e-7
-    mesh2D.vtu gravity_driven_pcs_1_ts_10_t_300.000000.vtu v_ref v 1.1 1e-7
-)
-
-AddTest(
-    NAME Adaptive_dt_StaggeredTH_ThermalDensityDrivenFlow2D
-    PATH StaggeredCoupledProcesses/TH/ThermalDensityDrivenFlow2D
-    EXECUTABLE ogs
-    EXECUTABLE_ARGS thermal_gravity_driven_flow2d_adaptive_dt.prj
-    WRAPPER time
-    TESTER vtkdiff
-    REQUIREMENTS NOT OGS_USE_MPI
-    DIFF_DATA
-    mesh2D.vtu gravity_driven_adaptive_dt_pcs_1_ts_10_t_300.000000.vtu OGS5_PRESSURE1 pressure 1.1 1e-6
-    mesh2D.vtu gravity_driven_adaptive_dt_pcs_1_ts_10_t_300.000000.vtu OGS5_TEMPERATURE1 temperature 1.1 1e-6
-    mesh2D.vtu gravity_driven_adaptive_dt_pcs_1_ts_10_t_300.000000.vtu v_ref v 1.1 1e-6
-)
-
-AddTest(
-    NAME Adaptive_dt_ThermalConvectionFlow2D
-    PATH StaggeredCoupledProcesses/TH/ThermalConvection2D
-    EXECUTABLE ogs
-    EXECUTABLE_ARGS quad_5500x5500_adaptive_dt.prj
-    WRAPPER time
-    TESTER vtkdiff
-    REQUIREMENTS NOT OGS_USE_MPI
-    DIFF_DATA
-    ThermalConvection_pcs_1_ts_232_t_50000000000.000000_non_const_mu.vtu  ThermalConvection_pcs_1_ts_223_t_50000000000.000000.vtu  pressure pressure 1.e-3 1e-10
-    ThermalConvection_pcs_1_ts_232_t_50000000000.000000_non_const_mu.vtu  ThermalConvection_pcs_1_ts_223_t_50000000000.000000.vtu  temperature temperature 1.e-3 1e-10
-)
-
-AddTest(
-    NAME Adaptive_dt_ThermalConvectionFlow2D_Constant_Viscosity
-    PATH StaggeredCoupledProcesses/TH/ThermalConvection2D
-    EXECUTABLE ogs
-    EXECUTABLE_ARGS quad_5500x5500_adaptive_dt_constant_viscosity.prj
-    WRAPPER time
-    TESTER vtkdiff
-    REQUIREMENTS NOT OGS_USE_MPI
-    DIFF_DATA
-    ConstViscosityThermalConvection_pcs_1_ts_137_t_50000000000.000000.vtu  ConstViscosityThermalConvection_pcs_1_ts_137_t_50000000000.000000.vtu  pressure pressure 1.e-16 1e-9
-    ConstViscosityThermalConvection_pcs_1_ts_137_t_50000000000.000000.vtu  ConstViscosityThermalConvection_pcs_1_ts_137_t_50000000000.000000.vtu  temperature temperature 1.e-16 1e-9
-)
-
 #===============================================================================
 # PETSc/MPI
 AddTest(
@@ -196,60 +141,3 @@ AddTest(
     DIFF_DATA
     hex.vtu isotropic_gravity_driven3D_pcs_0_ts_1_t_1_000000_0.vtu analytic_pressure pressure 1e-6 1e-6
 )
-
-# Coupling
-AddTest(
-    NAME StaggeredTH_ThermalDensityDrivenFlow2D
-    PATH StaggeredCoupledProcesses/TH/ThermalDensityDrivenFlow2D
-    EXECUTABLE_ARGS thermal_gravity_driven_flow2d.prj
-    WRAPPER mpirun
-    WRAPPER_ARGS -np 1
-    TESTER vtkdiff
-    REQUIREMENTS OGS_USE_MPI
-    DIFF_DATA
-    mesh2D.vtu gravity_driven_pcs_1_ts_10_t_300_000000_0.vtu OGS5_PRESSURE1  pressure 1.1 1e-7
-    mesh2D.vtu gravity_driven_pcs_1_ts_10_t_300_000000_0.vtu OGS5_TEMPERATURE1 temperature 1.1 1e-7
-# To be activated when the output of velocity is correct under PETSc version.
-#    mesh2D.vtu gravity_driven_pcs_1_ts_10_t_300_000000_0.vtu v_ref v 1.1 1e-7
-
-)
-
-AddTest(
-    NAME Adaptive_dt_StaggeredTH_ThermalDensityDrivenFlow2D
-    PATH StaggeredCoupledProcesses/TH/ThermalDensityDrivenFlow2D
-    EXECUTABLE_ARGS thermal_gravity_driven_flow2d_adaptive_dt.prj
-    WRAPPER mpirun
-    WRAPPER_ARGS -np 1
-    TESTER vtkdiff
-    REQUIREMENTS OGS_USE_MPI
-    DIFF_DATA
-    mesh2D.vtu gravity_driven_adaptive_dt_pcs_1_ts_10_t_300_000000_0.vtu OGS5_PRESSURE1 pressure 1.1 1e-7
-    mesh2D.vtu gravity_driven_adaptive_dt_pcs_1_ts_10_t_300_000000_0.vtu OGS5_TEMPERATURE1 temperature 1.1 1e-7
-# To be activated when the output of velocity is correct under PETSc version.
-#    mesh2D.vtu gravity_driven_pcs_1_ts_5_t_300_000000_0.vtu v_ref v 1.1 1e-7
-)
-AddTest(
-    NAME Adaptive_dt_ThermalConvectionFlow2D
-    PATH StaggeredCoupledProcesses/TH/ThermalConvection2D
-    EXECUTABLE_ARGS quad_5500x5500_adaptive_dt.prj
-    WRAPPER mpirun
-    WRAPPER_ARGS -np 1
-    TESTER vtkdiff
-    REQUIREMENTS OGS_USE_MPI
-    DIFF_DATA
-    ThermalConvection_pcs_1_ts_232_t_50000000000.000000_non_const_mu.vtu  ThermalConvection_pcs_1_ts_223_t_50000000000_000000_0.vtu  pressure pressure 1.e-3 1e-10
-    ThermalConvection_pcs_1_ts_232_t_50000000000.000000_non_const_mu.vtu  ThermalConvection_pcs_1_ts_223_t_50000000000_000000_0.vtu  temperature temperature 1.e-3 1e-10
-)
-
-AddTest(
-    NAME Adaptive_dt_ThermalConvectionFlow2D_Constant_Viscosity
-    PATH StaggeredCoupledProcesses/TH/ThermalConvection2D
-    EXECUTABLE_ARGS quad_5500x5500_adaptive_dt_constant_viscosity.prj
-    WRAPPER mpirun
-    WRAPPER_ARGS -np 1
-    TESTER vtkdiff
-    REQUIREMENTS OGS_USE_MPI
-    DIFF_DATA
-    ConstViscosityThermalConvection_pcs_1_ts_137_t_50000000000.000000.vtu  ConstViscosityThermalConvection_pcs_1_ts_137_t_50000000000_000000_0.vtu  pressure pressure 1.e-9 1e-9
-    ConstViscosityThermalConvection_pcs_1_ts_137_t_50000000000.000000.vtu  ConstViscosityThermalConvection_pcs_1_ts_137_t_50000000000_000000_0.vtu  temperature temperature 1.e-9 1e-9
-)
diff --git a/ProcessLib/LocalAssemblerInterface.cpp b/ProcessLib/LocalAssemblerInterface.cpp
index 910a73b311a4ef1b007c3797ca31b3b65df283bc..08ec6e57e9e8ee4290a1e245856b1b472d771699 100644
--- a/ProcessLib/LocalAssemblerInterface.cpp
+++ b/ProcessLib/LocalAssemblerInterface.cpp
@@ -15,8 +15,18 @@
 
 namespace ProcessLib
 {
+void LocalAssemblerInterface::assemble(double const /*t*/,
+                                       std::vector<double> const& /*local_x*/,
+                                       std::vector<double>& /*local_M_data*/,
+                                       std::vector<double>& /*local_K_data*/,
+                                       std::vector<double>& /*local_b_data*/)
+{
+    OGS_FATAL(
+        "The assemble() function is not implemented in the local assembler.");
+}
+
 void LocalAssemblerInterface::assembleWithCoupledTerm(
-    double const /*t*/, std::vector<double> const& /*local_x*/,
+    double const /*t*/,
     std::vector<double>& /*local_M_data*/,
     std::vector<double>& /*local_K_data*/,
     std::vector<double>& /*local_b_data*/,
@@ -41,13 +51,13 @@ void LocalAssemblerInterface::assembleWithJacobian(
 }
 
 void LocalAssemblerInterface::assembleWithJacobianAndCoupling(
-    double const /*t*/, std::vector<double> const& /*local_x*/,
-    std::vector<double> const& /*local_xdot*/, const double /*dxdot_dx*/,
-    const double /*dx_dx*/, std::vector<double>& /*local_M_data*/,
+    double const /*t*/, std::vector<double> const& /*local_xdot*/,
+    const double /*dxdot_dx*/, const double /*dx_dx*/,
+    std::vector<double>& /*local_M_data*/,
     std::vector<double>& /*local_K_data*/,
     std::vector<double>& /*local_b_data*/,
     std::vector<double>& /*local_Jac_data*/,
-    LocalCoupledSolutions const& /*coupled_solutions*/)
+    LocalCoupledSolutions const& /*local_coupled_solutions*/)
 {
     OGS_FATAL(
         "The assembleWithJacobianAndCoupling() function is not implemented in"
@@ -57,26 +67,20 @@ void LocalAssemblerInterface::assembleWithJacobianAndCoupling(
 void LocalAssemblerInterface::computeSecondaryVariable(
     std::size_t const mesh_item_id,
     NumLib::LocalToGlobalIndexMap const& dof_table, double const t,
-    GlobalVector const& x,
-    CoupledSolutionsForStaggeredScheme const* coupled_term)
+    GlobalVector const& x, CoupledSolutionsForStaggeredScheme const* coupled_xs)
 {
     auto const indices = NumLib::getIndices(mesh_item_id, dof_table);
-    auto const local_x = x.get(indices);
 
-    if (!coupled_term)
+    if (coupled_xs == nullptr)
     {
+        auto const local_x = x.get(indices);
         computeSecondaryVariableConcrete(t, local_x);
     }
     else
     {
         auto const local_coupled_xs =
-            getCurrentLocalSolutionsOfCoupledProcesses(coupled_term->coupled_xs,
-                                                       indices);
-        if (!local_coupled_xs.empty())
-            computeSecondaryVariableWithCoupledProcessConcrete(
-                t, local_x, local_coupled_xs);
-        else
-            computeSecondaryVariableConcrete(t, local_x);
+            getCurrentLocalSolutions(*coupled_xs, indices);
+        computeSecondaryVariableWithCoupledProcessConcrete(t, local_coupled_xs);
     }
 }
 
diff --git a/ProcessLib/LocalAssemblerInterface.h b/ProcessLib/LocalAssemblerInterface.h
index e3fc5cb7983663113db12b0db010403dcd158563..6769abcaa403406f10155f44bce7383be9c90c7e 100644
--- a/ProcessLib/LocalAssemblerInterface.h
+++ b/ProcessLib/LocalAssemblerInterface.h
@@ -9,9 +9,6 @@
 
 #pragma once
 
-#include <unordered_map>
-#include <typeindex>
-
 #include "NumLib/NumericsConfig.h"
 #include "MathLib/Point3d.h"
 
@@ -41,11 +38,10 @@ public:
     virtual void assemble(double const t, std::vector<double> const& local_x,
                           std::vector<double>& local_M_data,
                           std::vector<double>& local_K_data,
-                          std::vector<double>& local_b_data) = 0;
+                          std::vector<double>& local_b_data);
 
     virtual void assembleWithCoupledTerm(
         double const t,
-        std::vector<double> const& local_x,
         std::vector<double>& local_M_data,
         std::vector<double>& local_K_data,
         std::vector<double>& local_b_data,
@@ -61,18 +57,17 @@ public:
                                       std::vector<double>& local_Jac_data);
 
     virtual void assembleWithJacobianAndCoupling(
-        double const t, std::vector<double> const& local_x,
-        std::vector<double> const& local_xdot, const double dxdot_dx,
-        const double dx_dx, std::vector<double>& local_M_data,
-        std::vector<double>& local_K_data, std::vector<double>& local_b_data,
-        std::vector<double>& local_Jac_data,
-        LocalCoupledSolutions const& coupled_solutions);
+        double const t, std::vector<double> const& local_xdot,
+        const double dxdot_dx, const double dx_dx,
+        std::vector<double>& local_M_data, std::vector<double>& local_K_data,
+        std::vector<double>& local_b_data, std::vector<double>& local_Jac_data,
+        LocalCoupledSolutions const& local_coupled_solutions);
 
     virtual void computeSecondaryVariable(
         std::size_t const mesh_item_id,
         NumLib::LocalToGlobalIndexMap const& dof_table, const double t,
         GlobalVector const& x,
-        CoupledSolutionsForStaggeredScheme const* coupled_term);
+        CoupledSolutionsForStaggeredScheme const* coupled_xs);
 
     virtual void preTimestep(std::size_t const mesh_item_id,
                              NumLib::LocalToGlobalIndexMap const& dof_table,
@@ -105,8 +100,7 @@ private:
     }
 
     virtual void computeSecondaryVariableWithCoupledProcessConcrete(
-        double const /*t*/, std::vector<double> const& /*local_x*/,
-        std::unordered_map<std::type_index, const std::vector<double>> const&
+        double const /*t*/, std::vector<std::vector<double>> const&
         /*coupled_local_solutions*/)
     {
     }
diff --git a/ProcessLib/Output.cpp b/ProcessLib/Output.cpp
index b86044b88c7f511ce8155c54510d4b71a1244de5..3efece6337f94aea583d518295633b8ff3743827 100644
--- a/ProcessLib/Output.cpp
+++ b/ProcessLib/Output.cpp
@@ -47,17 +47,23 @@ bool shallDoOutput(unsigned timestep, CountsSteps const& repeats_each_steps)
 int convertVtkDataMode(std::string const& data_mode)
 {
     if (data_mode == "Ascii")
+    {
         return 0;
+    }
     if (data_mode == "Binary")
+    {
         return 1;
+    }
     if (data_mode == "Appended")
+    {
         return 2;
+    }
     OGS_FATAL(
         "Unsupported vtk output file data mode \"%s\". Expected Ascii, "
         "Binary, or Appended.",
         data_mode.c_str());
 }
-}
+}  // namespace
 
 namespace ProcessLib
 {
@@ -117,16 +123,48 @@ newInstance(const BaseLib::ConfigTree &config, std::string const& output_directo
     return out;
 }
 
-void Output::addProcess(ProcessLib::Process const& process, const unsigned pcs_idx)
+void Output::addProcess(ProcessLib::Process const& process,
+                        const int process_id)
 {
     auto const filename =
-        BaseLib::joinPaths(_output_directory, _output_file_prefix + "_pcs_" + std::to_string(pcs_idx) + ".pvd");
+        BaseLib::joinPaths(_output_directory, _output_file_prefix + "_pcs_" +
+                            std::to_string(process_id) + ".pvd");
     _single_process_data.emplace(std::piecewise_construct,
                                  std::forward_as_tuple(&process),
-                                 std::forward_as_tuple(pcs_idx, filename));
+                                 std::forward_as_tuple(filename));
 }
 
+Output::SingleProcessData Output::findSingleProcessData(
+    Process const& process, const unsigned process_id) const
+{
+    if (process.isMonolithicSchemeUsed())
+    {
+        auto spd_it = _single_process_data.find(&process);
+        if (spd_it == _single_process_data.end()) {
+            OGS_FATAL("The given process is not contained in the output"
+                      " configuration. Aborting.");
+        }
+        return spd_it->second;
+    }
+
+    auto spd_range = _single_process_data.equal_range(&process);
+    unsigned counter = 0;
+    for (auto spd_it=spd_range.first; spd_it!=spd_range.second; ++spd_it)
+    {
+        if(counter == process_id)
+        {
+            return spd_it->second;
+        }
+        counter++;
+    }
+
+    OGS_FATAL("The given process is not contained in the output"
+                      " configuration. Aborting.");
+}
+
+
 void Output::doOutputAlways(Process const& process,
+                            const int process_id,
                             ProcessOutput const& process_output,
                             unsigned timestep,
                             const double t,
@@ -135,38 +173,46 @@ void Output::doOutputAlways(Process const& process,
     BaseLib::RunTime time_output;
     time_output.start();
 
-    auto spd_it = _single_process_data.find(&process);
-    if (spd_it == _single_process_data.end()) {
-        OGS_FATAL("The given process is not contained in the output configuration."
-            " Aborting.");
-    }
-    auto& spd = spd_it->second;
+    auto spd = findSingleProcessData(process, process_id);
 
     std::string const output_file_name =
-            _output_file_prefix + "_pcs_" + std::to_string(spd.process_index)
+            _output_file_prefix + "_pcs_" + std::to_string(process_id)
             + "_ts_" + std::to_string(timestep)
             + "_t_"  + std::to_string(t)
             + ".vtu";
     std::string const output_file_path = BaseLib::joinPaths(_output_directory, output_file_name);
-    DBUG("output to %s", output_file_path.c_str());
-    doProcessOutput(output_file_path, _output_file_compression,
+
+    const bool make_out = !(process_id < _single_process_data.size() - 1 &&
+                            !(process.isMonolithicSchemeUsed()));
+
+    if (make_out)
+        DBUG("output to %s", output_file_path.c_str());
+
+    doProcessOutput(output_file_path, make_out, _output_file_compression,
                     _output_file_data_mode, t, x, process.getMesh(),
                     process.getDOFTable(), process.getProcessVariables(),
                     process.getSecondaryVariables(), process_output);
-    spd.pvd_file.addVTUFile(output_file_name, t);
 
-    INFO("[time] Output of timestep %d took %g s.", timestep,
-         time_output.elapsed());
+    if (make_out)
+    {
+        spd.pvd_file.addVTUFile(output_file_name, t);
+
+        INFO("[time] Output of timestep %d took %g s.", timestep,
+             time_output.elapsed());
+    }
 }
 
 void Output::doOutput(Process const& process,
+                      const int process_id,
                       ProcessOutput const& process_output,
                       unsigned timestep,
                       const double t,
                       GlobalVector const& x)
 {
     if (shallDoOutput(timestep, _repeats_each_steps))
-        doOutputAlways(process, process_output, timestep, t, x);
+    {
+        doOutputAlways(process, process_id, process_output, timestep, t, x);
+    }
 #ifdef USE_INSITU
     // Note: last time step may be output twice: here and in
     // doOutputLastTimestep() which throws a warning.
@@ -175,50 +221,55 @@ void Output::doOutput(Process const& process,
 }
 
 void Output::doOutputLastTimestep(Process const& process,
+                                  const int process_id,
                                   ProcessOutput const& process_output,
                                   unsigned timestep,
                                   const double t,
                                   GlobalVector const& x)
 {
     if (!shallDoOutput(timestep, _repeats_each_steps))
-        doOutputAlways(process, process_output, timestep, t, x);
+    {
+        doOutputAlways(process, process_id, process_output, timestep, t, x);
+    }
 #ifdef USE_INSITU
     InSituLib::CoProcess(process.getMesh(), t, timestep, true);
 #endif
 }
 
 void Output::doOutputNonlinearIteration(Process const& process,
+                                        const int process_id,
                                         ProcessOutput const& process_output,
                                         const unsigned timestep, const double t,
                                         GlobalVector const& x,
                                         const unsigned iteration) const
 {
-    if (!_output_nonlinear_iteration_results) return;
+    if (!_output_nonlinear_iteration_results)
+    {
+        return;
+    }
 
     BaseLib::RunTime time_output;
     time_output.start();
 
-    auto spd_it = _single_process_data.find(&process);
-    if (spd_it == _single_process_data.end()) {
-        OGS_FATAL("The given process is not contained in the output configuration."
-            " Aborting.");
-    }
-    auto& spd = spd_it->second;
+    findSingleProcessData(process, process_id);
 
     std::string const output_file_name =
-            _output_file_prefix + "_pcs_" + std::to_string(spd.process_index)
+            _output_file_prefix + "_pcs_" + std::to_string(process_id)
             + "_ts_" + std::to_string(timestep)
             + "_t_"  + std::to_string(t)
             + "_nliter_" + std::to_string(iteration)
             + ".vtu";
     std::string const output_file_path = BaseLib::joinPaths(_output_directory, output_file_name);
     DBUG("output iteration results to %s", output_file_path.c_str());
-    doProcessOutput(output_file_path, _output_file_compression,
+    const bool make_out = !(process_id < _single_process_data.size() - 1 &&
+                            !(process.isMonolithicSchemeUsed()));
+    doProcessOutput(output_file_path, make_out, _output_file_compression,
                     _output_file_data_mode, t, x, process.getMesh(),
                     process.getDOFTable(), process.getProcessVariables(),
                     process.getSecondaryVariables(), process_output);
 
-    INFO("[time] Output took %g s.", time_output.elapsed());
+    if (make_out)
+        INFO("[time] Output took %g s.", time_output.elapsed());
 }
 
-}
+}  // namespace ProcessLib
diff --git a/ProcessLib/Output.h b/ProcessLib/Output.h
index f9d9bd15d009f2549f7f76e0ecf0781f5ac3a32b..71dc38a656f60c3db17a687b806890fac3f7a705 100644
--- a/ProcessLib/Output.h
+++ b/ProcessLib/Output.h
@@ -10,6 +10,7 @@
 #pragma once
 
 #include <utility>
+#include <map>
 
 #include "BaseLib/ConfigTree.h"
 #include "MeshLib/IO/VtkIO/PVDFile.h"
@@ -32,17 +33,19 @@ public:
                 const std::string& output_directory);
 
     //! TODO doc. Opens a PVD file for each process.
-    void addProcess(ProcessLib::Process const& process, const unsigned pcs_idx);
+    void addProcess(ProcessLib::Process const& process, const int process_id);
 
     //! Writes output for the given \c process if it should be written in the
     //! given \c timestep.
-    void doOutput(Process const& process, ProcessOutput const& process_output,
+    void doOutput(Process const& process, const int process_id,
+                  ProcessOutput const& process_output,
                   unsigned timestep, const double t, GlobalVector const& x);
 
     //! Writes output for the given \c process if it has not been written yet.
     //! This method is intended for doing output after the last timestep in order
     //! to make sure that its results are written.
     void doOutputLastTimestep(Process const& process,
+                              const int process_id,
                               ProcessOutput const& process_output,
                               unsigned timestep, const double t,
                               GlobalVector const& x);
@@ -50,13 +53,14 @@ public:
     //! Writes output for the given \c process.
     //! This method will always write.
     //! It is intended to write output in error handling routines.
-    void doOutputAlways(Process const& process,
+    void doOutputAlways(Process const& process, const int process_id,
                         ProcessOutput const& process_output, unsigned timestep,
                         const double t, GlobalVector const& x);
 
     //! Writes output for the given \c process.
     //! To be used for debug output after an iteration of the nonlinear solver.
     void doOutputNonlinearIteration(Process const& process,
+                                    const int process_id,
                                     ProcessOutput const& process_output,
                                     const unsigned timestep, const double t,
                                     GlobalVector const& x,
@@ -71,16 +75,14 @@ public:
         const unsigned repeat;     //!< Apply \c each_steps \c repeat times.
         const unsigned each_steps; //!< Do output every \c each_steps timestep.
     };
+
 private:
     struct SingleProcessData
     {
-        SingleProcessData(unsigned process_index_,
-                          std::string const& filename)
-            : process_index(process_index_)
-            , pvd_file(filename)
+        SingleProcessData(std::string const& filename)
+            : pvd_file(filename)
         {}
 
-        const unsigned process_index;
         MeshLib::IO::PVDFile pvd_file;
     };
 
@@ -103,7 +105,10 @@ private:
     //! Describes after which timesteps to write output.
     std::vector<PairRepeatEachSteps> _repeats_each_steps;
 
-    std::map<Process const*, SingleProcessData> _single_process_data;
+    std::multimap<Process const*, SingleProcessData> _single_process_data;
+
+    SingleProcessData findSingleProcessData(
+        Process const& process, const unsigned process_id) const;
 };
 
 }
diff --git a/ProcessLib/PhaseField/CreatePhaseFieldProcess.cpp b/ProcessLib/PhaseField/CreatePhaseFieldProcess.cpp
index 38546b7b51e062e3030f244a5ce9ae1a9b541046..b537383a5b35d7a4cc38b9dcdbd470e6bdf6a47a 100644
--- a/ProcessLib/PhaseField/CreatePhaseFieldProcess.cpp
+++ b/ProcessLib/PhaseField/CreatePhaseFieldProcess.cpp
@@ -36,41 +36,68 @@ std::unique_ptr<Process> createPhaseFieldProcess(
     config.checkConfigParameter("type", "PHASE_FIELD");
     DBUG("Create PhaseFieldProcess.");
 
+    auto const staggered_scheme =
+        //! \ogs_file_param{prj__processes__process__PHASE_FIELD__coupling_scheme}
+        config.getConfigParameterOptional<std::string>("coupling_scheme");
+    const bool use_monolithic_scheme =
+        (staggered_scheme && (*staggered_scheme == "staggered")) ? false : true;
+
     // Process variable.
 
     //! \ogs_file_param{prj__processes__process__PHASE_FIELD__process_variables}
     auto const pv_config = config.getConfigSubtree("process_variables");
 
-    auto process_variables = findProcessVariables(
-        variables, pv_config,
-        {//! \ogs_file_param_special{prj__processes__process__PHASE_FIELD__process_variables__phasefield}
-         "phasefield",
-         //! \ogs_file_param_special{prj__processes__process__PHASE_FIELD__process_variables__displacement}
-         "displacement"});
+    ProcessVariable* variable_ph;
+    ProcessVariable* variable_u;
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>
+        process_variables;
+    if (use_monolithic_scheme)  // monolithic scheme.
+    {
+        auto per_process_variables = findProcessVariables(
+            variables, pv_config,
+            {//! \ogs_file_param_special{prj__processes__process__PHASE_FIELD__process_variables__phasefield}
+            "phasefield",
+             //! \ogs_file_param_special{prj__processes__process__PHASE_FIELD__process_variables__displacement}
+            "displacement"});
+        variable_ph = &per_process_variables[0].get();
+        variable_u = &per_process_variables[1].get();
+        process_variables.push_back(std::move(per_process_variables));
+    }
+    else  // staggered scheme.
+    {
+        using namespace std::string_literals;
+        for (auto const& variable_name : {"phasefield"s, "displacement"s})
+        {
+            auto per_process_variables =
+                findProcessVariables(variables, pv_config, {variable_name});
+            process_variables.push_back(std::move(per_process_variables));
+        }
+        variable_ph = &process_variables[0][0].get();
+        variable_u = &process_variables[1][0].get();
+    }
 
     DBUG("Associate displacement with process variable \'%s\'.",
-         process_variables[1].get().getName().c_str());
+         variable_u->getName().c_str());
 
-    if (process_variables[1].get().getNumberOfComponents() != DisplacementDim)
+    if (variable_u->getNumberOfComponents() != DisplacementDim)
     {
         OGS_FATAL(
             "Number of components of the process variable '%s' is different "
             "from the displacement dimension: got %d, expected %d",
-            process_variables[1].get().getName().c_str(),
-            process_variables[1].get().getNumberOfComponents(),
+            variable_u->getName().c_str(),
+            variable_u->getNumberOfComponents(),
             DisplacementDim);
     }
 
     DBUG("Associate phase field with process variable \'%s\'.",
-         process_variables[0].get().getName().c_str());
-    if (process_variables[0].get().getNumberOfComponents() != 1)
+         variable_ph->getName().c_str());
+    if (variable_ph->getNumberOfComponents() != 1)
     {
         OGS_FATAL(
-            "Phase field process variable '%s' is not a scalar variable but has "
+            "Pressure process variable '%s' is not a scalar variable but has "
             "%d components.",
-            process_variables[0].get().getName().c_str(),
-            process_variables[0].get().getNumberOfComponents(),
-            DisplacementDim);
+            variable_ph->getName().c_str(),
+            variable_ph->getNumberOfComponents());
     }
 
     // Constitutive relation.
@@ -180,7 +207,8 @@ std::unique_ptr<Process> createPhaseFieldProcess(
     return std::make_unique<PhaseFieldProcess<DisplacementDim>>(
             mesh, std::move(jacobian_assembler), parameters, integration_order,
             std::move(process_variables), std::move(process_data),
-            std::move(secondary_variables), std::move(named_function_caller));
+            std::move(secondary_variables), std::move(named_function_caller),
+            use_monolithic_scheme);
 }
 
 template std::unique_ptr<Process> createPhaseFieldProcess<2>(
diff --git a/ProcessLib/PhaseField/PhaseFieldProcess-impl.h b/ProcessLib/PhaseField/PhaseFieldProcess-impl.h
index bdf2d897645d34c4e0d3a9f34dec27116055ab64..6abc841697fcd3cb8da1451e3c63aa4e8e7c3eb1 100644
--- a/ProcessLib/PhaseField/PhaseFieldProcess-impl.h
+++ b/ProcessLib/PhaseField/PhaseFieldProcess-impl.h
@@ -26,13 +26,16 @@ PhaseFieldProcess<DisplacementDim>::PhaseFieldProcess(
     std::unique_ptr<ProcessLib::AbstractJacobianAssembler>&& jacobian_assembler,
     std::vector<std::unique_ptr<ParameterBase>> const& parameters,
     unsigned const integration_order,
-    std::vector<std::reference_wrapper<ProcessVariable>>&& process_variables,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
+        process_variables,
     PhaseFieldProcessData<DisplacementDim>&& process_data,
     SecondaryVariableCollection&& secondary_variables,
-    NumLib::NamedFunctionCaller&& named_function_caller)
+    NumLib::NamedFunctionCaller&& named_function_caller,
+    bool const use_monolithic_scheme)
     : Process(mesh, std::move(jacobian_assembler), parameters,
               integration_order, std::move(process_variables),
-              std::move(secondary_variables), std::move(named_function_caller)),
+              std::move(secondary_variables), std::move(named_function_caller),
+              use_monolithic_scheme),
       _process_data(std::move(process_data))
 {
 }
diff --git a/ProcessLib/PhaseField/PhaseFieldProcess.h b/ProcessLib/PhaseField/PhaseFieldProcess.h
index 3db6e74e9de300625080d94aa8e920ea93fdcbc4..f9c4708afa809c6a82fecdd9da78c1e2da206f5e 100644
--- a/ProcessLib/PhaseField/PhaseFieldProcess.h
+++ b/ProcessLib/PhaseField/PhaseFieldProcess.h
@@ -57,11 +57,12 @@ public:
             jacobian_assembler,
         std::vector<std::unique_ptr<ParameterBase>> const& parameters,
         unsigned const integration_order,
-        std::vector<std::reference_wrapper<ProcessVariable>>&&
+        std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
             process_variables,
         PhaseFieldProcessData<DisplacementDim>&& process_data,
         SecondaryVariableCollection&& secondary_variables,
-        NumLib::NamedFunctionCaller&& named_function_caller);
+        NumLib::NamedFunctionCaller&& named_function_caller,
+        bool const use_monolithic_scheme);
 
     //! \name ODESystem interface
     //! @{
diff --git a/ProcessLib/Process.cpp b/ProcessLib/Process.cpp
index 119e9063dd9e36bdfd5d030ab33c6b01751870be..b1e8ed075ca86c66f7bcf8b16d375d0ab27b392e 100644
--- a/ProcessLib/Process.cpp
+++ b/ProcessLib/Process.cpp
@@ -24,17 +24,29 @@ Process::Process(
     std::unique_ptr<ProcessLib::AbstractJacobianAssembler>&& jacobian_assembler,
     std::vector<std::unique_ptr<ParameterBase>> const& parameters,
     unsigned const integration_order,
-    std::vector<std::reference_wrapper<ProcessVariable>>&& process_variables,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
+        process_variables,
     SecondaryVariableCollection&& secondary_variables,
-    NumLib::NamedFunctionCaller&& named_function_caller)
+    NumLib::NamedFunctionCaller&& named_function_caller,
+    const bool use_monolithic_scheme)
     : _mesh(mesh),
       _secondary_variables(std::move(secondary_variables)),
       _named_function_caller(std::move(named_function_caller)),
       _global_assembler(std::move(jacobian_assembler)),
+      _use_monolithic_scheme(use_monolithic_scheme),
       _coupled_solutions(nullptr),
       _integration_order(integration_order),
       _process_variables(std::move(process_variables)),
-      _boundary_conditions(parameters)
+      _boundary_conditions([&](const std::size_t number_of_processes)
+                               -> std::vector<BoundaryConditionCollection> {
+          std::vector<BoundaryConditionCollection> pcs_BCs;
+          pcs_BCs.reserve(number_of_processes);
+          for (std::size_t i = 0; i < number_of_processes; i++)
+          {
+              pcs_BCs.emplace_back(BoundaryConditionCollection(parameters));
+          }
+          return pcs_BCs;
+      }(_process_variables.size()))
 {
 }
 
@@ -57,40 +69,52 @@ void Process::initialize()
     finishNamedFunctionsInitialization();
 
     DBUG("Initialize boundary conditions.");
-    _boundary_conditions.addBCsForProcessVariables(
-        _process_variables, *_local_to_global_index_map, _integration_order);
-
-    for (int variable_id = 0;
-         variable_id < static_cast<int>(_process_variables.size());
-         ++variable_id)
+    for (std::size_t pcs_id = 0; pcs_id < _process_variables.size(); pcs_id++)
     {
-        ProcessVariable& pv = _process_variables[variable_id];
-        auto sts = pv.createSourceTerms(*_local_to_global_index_map,
-                                        variable_id, _integration_order);
+        auto const& per_process_variables = _process_variables[pcs_id];
+        auto& per_process_BCs = _boundary_conditions[pcs_id];
+
+        per_process_BCs.addBCsForProcessVariables(per_process_variables,
+                                                  *_local_to_global_index_map,
+                                                  _integration_order);
+
+        std::vector<std::unique_ptr<NodalSourceTerm>> per_process_source_terms;
+        for (auto& pv : per_process_variables)
+        {
+            auto sts = pv.get().createSourceTerms(*_local_to_global_index_map,
+                                                  0, _integration_order);
 
-        std::move(sts.begin(), sts.end(), std::back_inserter(_source_terms));
+            std::move(sts.begin(), sts.end(),
+                      std::back_inserter(per_process_source_terms));
+        }
+        _source_terms.push_back(std::move(per_process_source_terms));
     }
 }
 
-void Process::setInitialConditions(double const t, GlobalVector& x)
+void Process::setInitialConditions(const unsigned pcs_id, double const t,
+                                   GlobalVector& x)
 {
-    DBUG("Set initial conditions.");
+    auto const& per_process_variables = _process_variables[pcs_id];
+    for (std::size_t variable_id = 0;
+         variable_id < per_process_variables.size();
+         variable_id++)
+    {
+        SpatialPosition pos;
 
-    SpatialPosition pos;
+        auto const& pv = per_process_variables[variable_id];
+        DBUG("Set the initial condition of variable %s of process %d.",
+             pv.get().getName().data(), pcs_id);
 
-    for (int variable_id = 0;
-         variable_id < static_cast<int>(_process_variables.size());
-         ++variable_id)
-    {
-        ProcessVariable& pv = _process_variables[variable_id];
-        auto const& ic = pv.getInitialCondition();
+        auto const& ic = pv.get().getInitialCondition();
+
+        auto const num_comp = pv.get().getNumberOfComponents();
 
-        auto const num_comp = pv.getNumberOfComponents();
+        const int mesh_subset_id = _use_monolithic_scheme ? variable_id : 0;
 
         for (int component_id = 0; component_id < num_comp; ++component_id)
         {
             auto const& mesh_subsets =
-                _local_to_global_index_map->getMeshSubsets(variable_id,
+                _local_to_global_index_map->getMeshSubsets(mesh_subset_id,
                                                            component_id);
             for (auto const& mesh_subset : mesh_subsets)
             {
@@ -105,7 +129,7 @@ void Process::setInitialConditions(double const t, GlobalVector& x)
 
                     auto global_index =
                         std::abs(_local_to_global_index_map->getGlobalIndex(
-                            l, variable_id, component_id));
+                            l, mesh_subset_id, component_id));
 #ifdef USE_PETSC
                     // The global indices of the ghost entries of the global
                     // matrix or the global vectors need to be set as negative
@@ -144,9 +168,12 @@ void Process::assemble(const double t, GlobalVector const& x, GlobalMatrix& M,
 
     assembleConcreteProcess(t, x, M, K, b);
 
-    _boundary_conditions.applyNaturalBC(t, x, K, b);
+    const auto pcs_id =
+        (_coupled_solutions) != nullptr ? _coupled_solutions->process_id : 0;
+    _boundary_conditions[pcs_id].applyNaturalBC(t, x, K, b);
 
-    for (auto const& st : _source_terms)
+    auto& source_terms_per_pcs = _source_terms[pcs_id];
+    for (auto& st : source_terms_per_pcs)
     {
         st->integrateNodalSourceTerm(t, b);
     }
@@ -164,8 +191,10 @@ void Process::assembleWithJacobian(const double t, GlobalVector const& x,
     assembleWithJacobianConcreteProcess(t, x, xdot, dxdot_dx, dx_dx, M, K, b,
                                         Jac);
 
-    // TODO apply BCs to Jacobian.
-    _boundary_conditions.applyNaturalBC(t, x, K, b);
+    // TODO: apply BCs to Jacobian.
+    const auto pcs_id =
+        (_coupled_solutions) != nullptr ? _coupled_solutions->process_id : 0;
+    _boundary_conditions[pcs_id].applyNaturalBC(t, x, K, b);
 }
 
 void Process::constructDofTable()
@@ -174,23 +203,48 @@ void Process::constructDofTable()
     _mesh_subset_all_nodes =
         std::make_unique<MeshLib::MeshSubset>(_mesh, &_mesh.getNodes());
 
-    // Collect the mesh subsets in a vector.
+    // Vector of mesh subsets.
     std::vector<MeshLib::MeshSubsets> all_mesh_subsets;
-    for (ProcessVariable const& pv : _process_variables)
+
+    // Vector of the number of variable components
+    std::vector<int> vec_var_n_components;
+    if (_use_monolithic_scheme)
+    {
+        // Collect the mesh subsets in a vector.
+        for (ProcessVariable const& pv : _process_variables[0])
+        {
+            std::generate_n(
+                std::back_inserter(all_mesh_subsets),
+                pv.getNumberOfComponents(),
+                [&]() {
+                    return MeshLib::MeshSubsets{_mesh_subset_all_nodes.get()};
+                });
+        }
+
+        // Create a vector of the number of variable components
+        for (ProcessVariable const& pv : _process_variables[0])
+        {
+            vec_var_n_components.push_back(pv.getNumberOfComponents());
+        }
+    }
+    else  // for staggered scheme
     {
+        // Assuming that all equations of the coupled process use the same
+        // element order. Other cases can be considered by overloading this
+        // member function in the derived class.
+
+        // Collect the mesh subsets in a vector.
         std::generate_n(
             std::back_inserter(all_mesh_subsets),
-            pv.getNumberOfComponents(),
+            _process_variables[0][0].get().getNumberOfComponents(),
             [&]() {
                 return MeshLib::MeshSubsets{_mesh_subset_all_nodes.get()};
             });
-    }
-
-    // Create a vector of the number of variable components
-    std::vector<int> vec_var_n_components;
-    for (ProcessVariable const& pv : _process_variables)
-        vec_var_n_components.push_back(pv.getNumberOfComponents());
 
+        // Create a vector of the number of variable components.
+        vec_var_n_components.push_back(
+            _process_variables[0][0].get().getNumberOfComponents());
+    }
     _local_to_global_index_map =
         std::make_unique<NumLib::LocalToGlobalIndexMap>(
             std::move(all_mesh_subsets), vec_var_n_components,
@@ -202,7 +256,8 @@ void Process::initializeExtrapolator()
     NumLib::LocalToGlobalIndexMap const* dof_table_single_component;
     bool manage_storage;
 
-    if (_local_to_global_index_map->getNumberOfComponents() == 1)
+    if (_local_to_global_index_map->getNumberOfComponents() == 1 ||
+        !_use_monolithic_scheme)
     {
         // For single-variable-single-component processes reuse the existing DOF
         // table.
@@ -227,7 +282,7 @@ void Process::initializeExtrapolator()
         new NumLib::LocalLinearLeastSquaresExtrapolator(
             *dof_table_single_component));
 
-    // TODO Later on the DOF table can change during the simulation!
+    // TODO: Later on the DOF table can change during the simulation!
     _extrapolator_data = ExtrapolatorData(
         std::move(extrapolator), dof_table_single_component, manage_storage);
 }
@@ -301,4 +356,27 @@ NumLib::IterationResult Process::postIteration(const GlobalVector& x)
     return postIterationConcreteProcess(x);
 }
 
+void Process::setCoupledSolutionsOfPreviousTimeStep()
+{
+    const auto number_of_coupled_solutions =
+        _coupled_solutions->coupled_xs.size();
+    _coupled_solutions->coupled_xs_t0.reserve(number_of_coupled_solutions);
+    for (std::size_t i = 0; i < number_of_coupled_solutions; i++)
+    {
+        const auto x_t0 = getPreviousTimeStepSolution(i);
+        if (x_t0 == nullptr)
+        {
+            OGS_FATAL(
+                "Memory is not allocated for the global vector "
+                "of the solution of the previous time step for the ."
+                "staggered scheme.\n It can be done by overloading "
+                "Process::preTimestepConcreteProcess"
+                "(ref. HTProcess::preTimestepConcreteProcess) ");
+        }
+
+        MathLib::LinAlg::setLocalAccessibleVector(*x_t0);
+        _coupled_solutions->coupled_xs_t0.emplace_back(x_t0);
+    }
+}
+
 }  // namespace ProcessLib
diff --git a/ProcessLib/Process.h b/ProcessLib/Process.h
index ef2ef4d1b096652af79a874469fe49e9c07e1513..9bdd143265e3279b40ef8885fb513e0336630f97 100644
--- a/ProcessLib/Process.h
+++ b/ProcessLib/Process.h
@@ -46,10 +46,11 @@ public:
             std::unique_ptr<AbstractJacobianAssembler>&& jacobian_assembler,
             std::vector<std::unique_ptr<ParameterBase>> const& parameters,
             unsigned const integration_order,
-            std::vector<std::reference_wrapper<ProcessVariable>>&&
+            std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
                 process_variables,
             SecondaryVariableCollection&& secondary_variables,
-            NumLib::NamedFunctionCaller&& named_function_caller);
+            NumLib::NamedFunctionCaller&& named_function_caller,
+            const bool use_monolithic_scheme = true);
 
     /// Preprocessing before starting assembly for new timestep.
     void preTimestep(GlobalVector const& x, const double t,
@@ -67,7 +68,8 @@ public:
 
     void initialize();
 
-    void setInitialConditions(const double t, GlobalVector& x);
+    void setInitialConditions(const unsigned pcs_id, const double t,
+                              GlobalVector& x);
 
     MathLib::MatrixSpecifications getMatrixSpecifications() const final;
 
@@ -75,11 +77,12 @@ public:
         CoupledSolutionsForStaggeredScheme* const coupled_solutions)
     {
         _coupled_solutions = coupled_solutions;
+
     }
 
+    bool isMonolithicSchemeUsed() const { return _use_monolithic_scheme; }
+    virtual void setCoupledTermForTheStaggeredSchemeToLocalAssemblers() {}
     void preAssemble(const double t, GlobalVector const& x) override final;
-
-    virtual void setCoupledSolutionsForStaggeredSchemeToLocalAssemblers() {}
     void assemble(const double t, GlobalVector const& x, GlobalMatrix& M,
                   GlobalMatrix& K, GlobalVector& b) final;
 
@@ -92,7 +95,9 @@ public:
     std::vector<NumLib::IndexValueVector<GlobalIndexType>> const*
     getKnownSolutions(double const t) const final
     {
-        return _boundary_conditions.getKnownSolutions(t);
+        const auto pcs_id =
+            (_coupled_solutions) ? _coupled_solutions->process_id : 0;
+        return _boundary_conditions[pcs_id].getKnownSolutions(t);
     }
 
     NumLib::LocalToGlobalIndexMap const& getDOFTable() const
@@ -104,7 +109,9 @@ public:
     std::vector<std::reference_wrapper<ProcessVariable>> const&
     getProcessVariables() const
     {
-        return _process_variables;
+        const auto pcs_id =
+            (_coupled_solutions) ? _coupled_solutions->process_id : 0;
+        return _process_variables[pcs_id];
     }
 
     SecondaryVariableCollection const& getSecondaryVariables() const
@@ -113,12 +120,15 @@ public:
     }
 
     // Get the solution of the previous time step.
-    virtual GlobalVector* getPreviousTimeStepSolution() const
+
+    virtual GlobalVector* getPreviousTimeStepSolution(
+        const int /*process_id*/) const
     {
         return nullptr;
     }
 
     // Used as a call back for CalculateSurfaceFlux process.
+
     virtual std::vector<double> getFlux(std::size_t /*element_id*/,
                                         MathLib::Point3d const& /*p*/,
                                         GlobalVector const& /*x*/) const
@@ -211,12 +221,16 @@ protected:
 
     VectorMatrixAssembler _global_assembler;
 
+    const bool _use_monolithic_scheme;
+
     /// Pointer to CoupledSolutionsForStaggeredScheme, which contains the
-    /// references to the
-    /// coupled processes and the references to the solutions of the coupled
-    /// processes.
+    /// references to the solutions of the coupled processes.
     CoupledSolutionsForStaggeredScheme* _coupled_solutions;
 
+    /// Set the solutions of the previous time step to the coupled term.
+    /// It only performs for the staggered scheme.
+    void setCoupledSolutionsOfPreviousTimeStep();
+
     /// Order of the integration method for element-wise integration.
     /// The Gauss-Legendre integration method and available orders is
     /// implemented in MathLib::GaussLegendre.
@@ -225,11 +239,21 @@ protected:
 private:
     GlobalSparsityPattern _sparsity_pattern;
 
-    /// Variables used by this process.
-    std::vector<std::reference_wrapper<ProcessVariable>> _process_variables;
-
-    BoundaryConditionCollection _boundary_conditions;
-    std::vector<std::unique_ptr<NodalSourceTerm>> _source_terms;
+    /// Variables used by this process.  For the monolithic scheme or a
+    /// single process, the size of the outer vector is one. For the
+    /// staggered scheme, the size of the outer vector is the number of the
+    /// coupled processes.
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>
+        _process_variables;
+
+    /// Vector for boundary conditions. For the monolithic scheme or a
+    /// single process, the size of the vector is one. For the staggered
+    /// scheme, the size of vector is the number of the coupled processes.
+    std::vector<BoundaryConditionCollection> _boundary_conditions;
+
+    /// Vector for nodal source terms. The outer vector is for processes,
+    /// which has the same size as that for boundary conditions.
+    std::vector<std::vector<std::unique_ptr<NodalSourceTerm>>> _source_terms;
 
     ExtrapolatorData _extrapolator_data;
 };
diff --git a/ProcessLib/ProcessOutput.cpp b/ProcessLib/ProcessOutput.cpp
index 66c4b0abca2da8d431c28efb38bf17042bc6dc04..2141a45c824184874a8dffe6070beb9b0052f623 100644
--- a/ProcessLib/ProcessOutput.cpp
+++ b/ProcessLib/ProcessOutput.cpp
@@ -41,6 +41,7 @@ ProcessOutput::ProcessOutput(BaseLib::ConfigTree const& output_config)
 }
 
 void doProcessOutput(std::string const& file_name,
+                     bool const make_output,
                      bool const compress_output,
                      int const data_mode,
                      const double t,
@@ -71,6 +72,7 @@ void doProcessOutput(std::string const& file_name,
     int global_component_offset = 0;
     int global_component_offset_next = 0;
 
+    const auto number_of_dof_variables = dof_table.getNumberOfVariables();
     // primary variables
     for (int variable_id = 0;
          variable_id < static_cast<int>(process_variables.size());
@@ -78,11 +80,21 @@ void doProcessOutput(std::string const& file_name,
     {
         ProcessVariable& pv = process_variables[variable_id];
         int const n_components = pv.getNumberOfComponents();
-        global_component_offset = global_component_offset_next;
-        global_component_offset_next += n_components;
+        // If (number_of_dof_variables==1), the case is either the staggered
+        // scheme being applied or a single PDE being solved.
+        const int sub_meshset_id
+            = (number_of_dof_variables==1) ? 0 : variable_id;
+
+        if (number_of_dof_variables > 1)
+        {
+            global_component_offset = global_component_offset_next;
+            global_component_offset_next += n_components;
+        }
 
         if (output_variables.find(pv.getName()) == output_variables.cend())
+        {
             continue;
+        }
 
         already_output.insert(pv.getName());
 
@@ -95,8 +107,7 @@ void doProcessOutput(std::string const& file_name,
         for (int component_id = 0; component_id < num_comp; ++component_id)
         {
             auto const& mesh_subsets =
-                dof_table.getMeshSubsets(variable_id,
-                                                           component_id);
+                dof_table.getMeshSubsets(sub_meshset_id, component_id);
             for (auto const& mesh_subset : mesh_subsets)
             {
                 auto const mesh_id = mesh_subset->getMeshID();
@@ -215,10 +226,15 @@ void doProcessOutput(std::string const& file_name,
     (void)t;
 #endif // USE_PETSC
 
+    if (!make_output)
+    {
+        return;
+    }
+
     // Write output file
     DBUG("Writing output to \'%s\'.", file_name.c_str());
     MeshLib::IO::VtuInterface vtu_interface(&mesh, data_mode, compress_output);
     vtu_interface.writeToFile(file_name);
 }
 
-} // ProcessLib
+}  // namespace ProcessLib
diff --git a/ProcessLib/ProcessOutput.h b/ProcessLib/ProcessOutput.h
index ac1687d38f31a893376f8801335f5896e51fa7b1..bfd0e464965c61bf974e34731fa776cfc5ac065f 100644
--- a/ProcessLib/ProcessOutput.h
+++ b/ProcessLib/ProcessOutput.h
@@ -33,6 +33,7 @@ struct ProcessOutput final
 /// See Output::_output_file_data_mode documentation for the data_mode
 /// parameter.
 void doProcessOutput(std::string const& file_name,
+                     bool const make_output,
                      bool const compress_output,
                      int const data_mode,
                      const double t,
diff --git a/ProcessLib/RichardsComponentTransport/CreateRichardsComponentTransportProcess.cpp b/ProcessLib/RichardsComponentTransport/CreateRichardsComponentTransportProcess.cpp
index 05d54724a2ce5626d0a337c217f1d10c49ab4b9d..efb8122063c56db24060d0f19a31e88c98824029 100644
--- a/ProcessLib/RichardsComponentTransport/CreateRichardsComponentTransportProcess.cpp
+++ b/ProcessLib/RichardsComponentTransport/CreateRichardsComponentTransportProcess.cpp
@@ -36,18 +36,40 @@ std::unique_ptr<Process> createRichardsComponentTransportProcess(
 
     DBUG("Create RichardsComponentTransportProcess.");
 
+    auto const staggered_scheme =
+        //! \ogs_file_param{prj__processes__process__RichardsComponentTransport__coupling_scheme}
+        config.getConfigParameterOptional<std::string>("coupling_scheme");
+    const bool use_monolithic_scheme =
+        (staggered_scheme && (*staggered_scheme == "staggered")) ? false : true;
+
     // Process variable.
 
     //! \ogs_file_param{prj__processes__process__RichardsComponentTransport__process_variables}
     auto const pv_config = config.getConfigSubtree("process_variables");
 
-    auto process_variables = findProcessVariables(
-        variables, pv_config,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>
+        process_variables;
+    if (use_monolithic_scheme)  // monolithic scheme.
+    {
+        auto per_process_variables = findProcessVariables(
+            variables, pv_config,
+            {
+            //! \ogs_file_param_special{prj__processes__process__RichardsComponentTransport__process_variables__concentration}
+            "concentration",
+            //! \ogs_file_param_special{prj__processes__process__RichardsComponentTransport__process_variables__pressure}
+            "pressure"});
+        process_variables.push_back(std::move(per_process_variables));
+    }
+    else  // staggered scheme.
+    {
+        using namespace std::string_literals;
+        for (auto const& variable_name : {"concentration"s, "pressure"s})
         {
-        //! \ogs_file_param_special{prj__processes__process__RichardsComponentTransport__process_variables__concentration}
-        "concentration",
-        //! \ogs_file_param_special{prj__processes__process__RichardsComponentTransport__process_variables__pressure}
-        "pressure"});
+            auto per_process_variables =
+                findProcessVariables(variables, pv_config, {variable_name});
+            process_variables.push_back(std::move(per_process_variables));
+        }
+    }
 
     auto const& porous_medium_configs =
         //! \ogs_file_param{prj__processes__process__RichardsComponentTransport__porous_medium}
@@ -146,7 +168,8 @@ std::unique_ptr<Process> createRichardsComponentTransportProcess(
     return std::make_unique<RichardsComponentTransportProcess>(
         mesh, std::move(jacobian_assembler), parameters, integration_order,
         std::move(process_variables), std::move(process_data),
-        std::move(secondary_variables), std::move(named_function_caller));
+        std::move(secondary_variables), std::move(named_function_caller),
+        use_monolithic_scheme);
 }
 
 }  // namespace RichardsComponentTransport
diff --git a/ProcessLib/RichardsComponentTransport/RichardsComponentTransportProcess.cpp b/ProcessLib/RichardsComponentTransport/RichardsComponentTransportProcess.cpp
index 00f5dcd9e537722832a40c0b6d2a96de8bf5390e..4071442b6d0539292e253d61ee9e6d592e785e46 100644
--- a/ProcessLib/RichardsComponentTransport/RichardsComponentTransportProcess.cpp
+++ b/ProcessLib/RichardsComponentTransport/RichardsComponentTransportProcess.cpp
@@ -22,13 +22,16 @@ RichardsComponentTransportProcess::RichardsComponentTransportProcess(
     std::unique_ptr<ProcessLib::AbstractJacobianAssembler>&& jacobian_assembler,
     std::vector<std::unique_ptr<ParameterBase>> const& parameters,
     unsigned const integration_order,
-    std::vector<std::reference_wrapper<ProcessVariable>>&& process_variables,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
+        process_variables,
     RichardsComponentTransportProcessData&& process_data,
     SecondaryVariableCollection&& secondary_variables,
-    NumLib::NamedFunctionCaller&& named_function_caller)
+    NumLib::NamedFunctionCaller&& named_function_caller,
+    bool const use_monolithic_scheme)
     : Process(mesh, std::move(jacobian_assembler), parameters,
               integration_order, std::move(process_variables),
-              std::move(secondary_variables), std::move(named_function_caller)),
+              std::move(secondary_variables), std::move(named_function_caller),
+              use_monolithic_scheme),
       _process_data(std::move(process_data))
 {
 }
diff --git a/ProcessLib/RichardsComponentTransport/RichardsComponentTransportProcess.h b/ProcessLib/RichardsComponentTransport/RichardsComponentTransportProcess.h
index 9e81aac5a3bbd297314d6e95e0fd3a22521cb878..357982028cda155eb251faac661f732bed3a52f1 100644
--- a/ProcessLib/RichardsComponentTransport/RichardsComponentTransportProcess.h
+++ b/ProcessLib/RichardsComponentTransport/RichardsComponentTransportProcess.h
@@ -107,11 +107,12 @@ public:
             jacobian_assembler,
         std::vector<std::unique_ptr<ParameterBase>> const& parameters,
         unsigned const integration_order,
-        std::vector<std::reference_wrapper<ProcessVariable>>&&
+        std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
             process_variables,
         RichardsComponentTransportProcessData&& process_data,
         SecondaryVariableCollection&& secondary_variables,
-        NumLib::NamedFunctionCaller&& named_function_caller);
+        NumLib::NamedFunctionCaller&& named_function_caller,
+        bool const use_monolithic_scheme);
 
     //! \name ODESystem interface
     //! @{
diff --git a/ProcessLib/RichardsFlow/CreateRichardsFlowProcess.cpp b/ProcessLib/RichardsFlow/CreateRichardsFlowProcess.cpp
index ab7b1e4dbaeab0f872b0ef728ee49b67a800a7f7..cde5435bdaf1ba8060ebb1adacb0176281259f72 100644
--- a/ProcessLib/RichardsFlow/CreateRichardsFlowProcess.cpp
+++ b/ProcessLib/RichardsFlow/CreateRichardsFlowProcess.cpp
@@ -42,10 +42,13 @@ std::unique_ptr<Process> createRichardsFlowProcess(
     //! \ogs_file_param{prj__processes__process__RICHARDS_FLOW__process_variables}
     auto const pv_config = config.getConfigSubtree("process_variables");
 
-    auto process_variables = findProcessVariables(
+    auto per_process_variables = findProcessVariables(
         variables, pv_config,
         {//! \ogs_file_param_special{prj__processes__process__RICHARDS_FLOW__process_variables__process_variable}
          "process_variable"});
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>
+        process_variables;
+    process_variables.push_back(std::move(per_process_variables));
 
     SecondaryVariableCollection secondary_variables;
 
diff --git a/ProcessLib/RichardsFlow/RichardsFlowProcess.cpp b/ProcessLib/RichardsFlow/RichardsFlowProcess.cpp
index 40740755ae204fc050115efb4b9d6695b0000cc8..f1384625f61c59cd2d2cc3541cf7717da232543c 100644
--- a/ProcessLib/RichardsFlow/RichardsFlowProcess.cpp
+++ b/ProcessLib/RichardsFlow/RichardsFlowProcess.cpp
@@ -22,7 +22,8 @@ RichardsFlowProcess::RichardsFlowProcess(
     std::unique_ptr<ProcessLib::AbstractJacobianAssembler>&& jacobian_assembler,
     std::vector<std::unique_ptr<ParameterBase>> const& parameters,
     unsigned const integration_order,
-    std::vector<std::reference_wrapper<ProcessVariable>>&& process_variables,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
+        process_variables,
     RichardsFlowProcessData&& process_data,
     SecondaryVariableCollection&& secondary_variables,
     NumLib::NamedFunctionCaller&& named_function_caller,
diff --git a/ProcessLib/RichardsFlow/RichardsFlowProcess.h b/ProcessLib/RichardsFlow/RichardsFlowProcess.h
index 9f59e799854d7c6554c2704a0c578571ee788070..d8eda9d95230f722c5c8eb88afded761775ed766 100644
--- a/ProcessLib/RichardsFlow/RichardsFlowProcess.h
+++ b/ProcessLib/RichardsFlow/RichardsFlowProcess.h
@@ -30,7 +30,7 @@ public:
             jacobian_assembler,
         std::vector<std::unique_ptr<ParameterBase>> const& parameters,
         unsigned const integration_order,
-        std::vector<std::reference_wrapper<ProcessVariable>>&&
+        std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
             process_variables,
         RichardsFlowProcessData&& process_data,
         SecondaryVariableCollection&& secondary_variables,
diff --git a/ProcessLib/SmallDeformation/CreateSmallDeformationProcess.cpp b/ProcessLib/SmallDeformation/CreateSmallDeformationProcess.cpp
index 8db7dc6dfc234d1d6b49bb569729339b8da57f20..69fa6498005ac9ad1a959bc9b533c9622045fbfe 100644
--- a/ProcessLib/SmallDeformation/CreateSmallDeformationProcess.cpp
+++ b/ProcessLib/SmallDeformation/CreateSmallDeformationProcess.cpp
@@ -43,24 +43,27 @@ createSmallDeformationProcess(
     //! \ogs_file_param{prj__processes__process__SMALL_DEFORMATION__process_variables}
     auto const pv_config = config.getConfigSubtree("process_variables");
 
-    auto process_variables = findProcessVariables(
+    auto per_process_variables = findProcessVariables(
         variables, pv_config,
         {//! \ogs_file_param_special{prj__processes__process__SMALL_DEFORMATION__process_variables__process_variable}
          "process_variable"});
 
     DBUG("Associate displacement with process variable \'%s\'.",
-         process_variables.back().get().getName().c_str());
+         per_process_variables.back().get().getName().c_str());
 
-    if (process_variables.back().get().getNumberOfComponents() !=
+    if (per_process_variables.back().get().getNumberOfComponents() !=
         DisplacementDim)
     {
         OGS_FATAL(
             "Number of components of the process variable '%s' is different "
             "from the displacement dimension: got %d, expected %d",
-            process_variables.back().get().getName().c_str(),
-            process_variables.back().get().getNumberOfComponents(),
+            per_process_variables.back().get().getName().c_str(),
+            per_process_variables.back().get().getNumberOfComponents(),
             DisplacementDim);
     }
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>
+        process_variables;
+    process_variables.push_back(std::move(per_process_variables));
 
     // Constitutive relation.
     // read type;
diff --git a/ProcessLib/SmallDeformation/SmallDeformationProcess-impl.h b/ProcessLib/SmallDeformation/SmallDeformationProcess-impl.h
index a268bd9d3f6e38ae9e3277fdb86692e5e98b6ed5..399670070d9ad2498f39bda39d1ccbb646db2bfd 100644
--- a/ProcessLib/SmallDeformation/SmallDeformationProcess-impl.h
+++ b/ProcessLib/SmallDeformation/SmallDeformationProcess-impl.h
@@ -27,7 +27,8 @@ SmallDeformationProcess<DisplacementDim>::SmallDeformationProcess(
     std::unique_ptr<ProcessLib::AbstractJacobianAssembler>&& jacobian_assembler,
     std::vector<std::unique_ptr<ParameterBase>> const& parameters,
     unsigned const integration_order,
-    std::vector<std::reference_wrapper<ProcessVariable>>&& process_variables,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
+        process_variables,
     SmallDeformationProcessData<DisplacementDim>&& process_data,
     SecondaryVariableCollection&& secondary_variables,
     NumLib::NamedFunctionCaller&& named_function_caller)
diff --git a/ProcessLib/SmallDeformation/SmallDeformationProcess.h b/ProcessLib/SmallDeformation/SmallDeformationProcess.h
index 5c95ceb390b963699d6522cb0bfd60b00cc70443..ebf72da22c61de3d2ee5cb8e6e8aeb4f568f1c86 100644
--- a/ProcessLib/SmallDeformation/SmallDeformationProcess.h
+++ b/ProcessLib/SmallDeformation/SmallDeformationProcess.h
@@ -29,7 +29,7 @@ public:
             jacobian_assembler,
         std::vector<std::unique_ptr<ParameterBase>> const& parameters,
         unsigned const integration_order,
-        std::vector<std::reference_wrapper<ProcessVariable>>&&
+        std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
             process_variables,
         SmallDeformationProcessData<DisplacementDim>&& process_data,
         SecondaryVariableCollection&& secondary_variables,
diff --git a/ProcessLib/TES/CreateTESProcess.cpp b/ProcessLib/TES/CreateTESProcess.cpp
index be8eeb4d97638c778d5d86f7e01025f44d5ec616..72ee730ce4b609b83576ddc966a37bf002b4d652 100644
--- a/ProcessLib/TES/CreateTESProcess.cpp
+++ b/ProcessLib/TES/CreateTESProcess.cpp
@@ -32,7 +32,7 @@ std::unique_ptr<Process> createTESProcess(
     //! \ogs_file_param{prj__processes__process__TES__process_variables}
     auto const pv_config = config.getConfigSubtree("process_variables");
 
-    auto process_variables = findProcessVariables(
+    auto per_process_variables = findProcessVariables(
         variables, pv_config,
         {
         //! \ogs_file_param_special{prj__processes__process__TES__process_variables__fluid_pressure}
@@ -41,6 +41,9 @@ std::unique_ptr<Process> createTESProcess(
         "temperature",
         //! \ogs_file_param_special{prj__processes__process__TES__process_variables__vapour_mass_fraction}
         "vapour_mass_fraction"});
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>
+        process_variables;
+    process_variables.push_back(std::move(per_process_variables));
 
     SecondaryVariableCollection secondary_variables;
 
diff --git a/ProcessLib/TES/TESProcess.cpp b/ProcessLib/TES/TESProcess.cpp
index 40d96dd594d8a218ac69e0c90483ebfe7f8f6fdd..790d784987306e277fdd49d5df86f41788951e44 100644
--- a/ProcessLib/TES/TESProcess.cpp
+++ b/ProcessLib/TES/TESProcess.cpp
@@ -21,7 +21,8 @@ TESProcess::TESProcess(
     std::unique_ptr<AbstractJacobianAssembler>&& jacobian_assembler,
     std::vector<std::unique_ptr<ParameterBase>> const& parameters,
     unsigned const integration_order,
-    std::vector<std::reference_wrapper<ProcessVariable>>&& process_variables,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
+        process_variables,
     SecondaryVariableCollection&& secondary_variables,
     NumLib::NamedFunctionCaller&& named_function_caller,
     const BaseLib::ConfigTree& config)
diff --git a/ProcessLib/TES/TESProcess.h b/ProcessLib/TES/TESProcess.h
index fb2a8062f5bfb46886f33dfa4137acd3d2611d27..309fc1615b9f1fc583333a8f3d1575cdbad9fbb2 100644
--- a/ProcessLib/TES/TESProcess.h
+++ b/ProcessLib/TES/TESProcess.h
@@ -34,7 +34,7 @@ public:
                std::unique_ptr<AbstractJacobianAssembler>&& jacobian_assembler,
                std::vector<std::unique_ptr<ParameterBase>> const& parameters,
                unsigned const integration_order,
-               std::vector<std::reference_wrapper<ProcessVariable>>&&
+               std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
                    process_variables,
                SecondaryVariableCollection&& secondary_variables,
                NumLib::NamedFunctionCaller&& named_function_caller,
diff --git a/ProcessLib/ThermalTwoPhaseFlowWithPP/CreateThermalTwoPhaseFlowWithPPProcess.cpp b/ProcessLib/ThermalTwoPhaseFlowWithPP/CreateThermalTwoPhaseFlowWithPPProcess.cpp
index e8dd8ca61c0f783175347118e97f0cd1f1f2e12e..30cf2e9bff13bd9d9749ac0a03c09339456e0d7d 100644
--- a/ProcessLib/ThermalTwoPhaseFlowWithPP/CreateThermalTwoPhaseFlowWithPPProcess.cpp
+++ b/ProcessLib/ThermalTwoPhaseFlowWithPP/CreateThermalTwoPhaseFlowWithPPProcess.cpp
@@ -43,7 +43,7 @@ std::unique_ptr<Process> createThermalTwoPhaseFlowWithPPProcess(
     //! \ogs_file_param{prj__processes__process__TWOPHASE_FLOW_THERMAL__process_variables}
     auto const pv_config = config.getConfigSubtree("process_variables");
 
-    auto process_variables = findProcessVariables(
+    auto per_process_variables = findProcessVariables(
         variables, pv_config,
         {//! \ogs_file_param_special{prj__processes__process__TWOPHASE_FLOW_THERMAL__process_variables__gas_pressure}
          "gas_pressure",
@@ -51,6 +51,9 @@ std::unique_ptr<Process> createThermalTwoPhaseFlowWithPPProcess(
          "capillary_pressure",
          //! \ogs_file_param_special{prj__processes__process__TWOPHASE_FLOW_THERMAL__process_variables__temperature}
          "temperature"});
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>
+        process_variables;
+    process_variables.push_back(std::move(per_process_variables));
 
     SecondaryVariableCollection secondary_variables;
 
diff --git a/ProcessLib/ThermalTwoPhaseFlowWithPP/ThermalTwoPhaseFlowWithPPProcess.cpp b/ProcessLib/ThermalTwoPhaseFlowWithPP/ThermalTwoPhaseFlowWithPPProcess.cpp
index 4acf0cdca1d03c7c5ac26e7c01c5f86dd0dca445..fde51a5ae41394fac4bacfb92d179f656ae5fbf1 100644
--- a/ProcessLib/ThermalTwoPhaseFlowWithPP/ThermalTwoPhaseFlowWithPPProcess.cpp
+++ b/ProcessLib/ThermalTwoPhaseFlowWithPP/ThermalTwoPhaseFlowWithPPProcess.cpp
@@ -29,7 +29,8 @@ ThermalTwoPhaseFlowWithPPProcess::ThermalTwoPhaseFlowWithPPProcess(
     std::unique_ptr<AbstractJacobianAssembler>&& jacobian_assembler,
     std::vector<std::unique_ptr<ParameterBase>> const& parameters,
     unsigned const integration_order,
-    std::vector<std::reference_wrapper<ProcessVariable>>&& process_variables,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
+        process_variables,
     ThermalTwoPhaseFlowWithPPProcessData&& process_data,
     SecondaryVariableCollection&& secondary_variables,
     NumLib::NamedFunctionCaller&& named_function_caller,
diff --git a/ProcessLib/ThermalTwoPhaseFlowWithPP/ThermalTwoPhaseFlowWithPPProcess.h b/ProcessLib/ThermalTwoPhaseFlowWithPP/ThermalTwoPhaseFlowWithPPProcess.h
index 4ede515c524b283a1bff1dfbad348d9cdab28b69..e58c243c6b741c9b6cd0b2054f9da7f18580f18b 100644
--- a/ProcessLib/ThermalTwoPhaseFlowWithPP/ThermalTwoPhaseFlowWithPPProcess.h
+++ b/ProcessLib/ThermalTwoPhaseFlowWithPP/ThermalTwoPhaseFlowWithPPProcess.h
@@ -42,7 +42,7 @@ public:
         std::unique_ptr<AbstractJacobianAssembler>&& jacobian_assembler,
         std::vector<std::unique_ptr<ParameterBase>> const& parameters,
         unsigned const integration_order,
-        std::vector<std::reference_wrapper<ProcessVariable>>&&
+        std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
             process_variables,
         ThermalTwoPhaseFlowWithPPProcessData&& process_data,
         SecondaryVariableCollection&& secondary_variables,
diff --git a/ProcessLib/ThermoMechanics/CreateThermoMechanicsProcess.cpp b/ProcessLib/ThermoMechanics/CreateThermoMechanicsProcess.cpp
index e785b49dc3df2f49110c943e701804d65cccd015..4019fe48473b574b1b40b6809efeddaad41c723e 100644
--- a/ProcessLib/ThermoMechanics/CreateThermoMechanicsProcess.cpp
+++ b/ProcessLib/ThermoMechanics/CreateThermoMechanicsProcess.cpp
@@ -36,41 +36,68 @@ std::unique_ptr<Process> createThermoMechanicsProcess(
     config.checkConfigParameter("type", "THERMO_MECHANICS");
     DBUG("Create ThermoMechanicsProcess.");
 
+     auto const staggered_scheme =
+        //! \ogs_file_param{prj__processes__process__THERMO_MECHANICS__coupling_scheme}
+        config.getConfigParameterOptional<std::string>("coupling_scheme");
+    const bool use_monolithic_scheme =
+        (staggered_scheme && (*staggered_scheme == "staggered")) ? false : true;
+
     // Process variable.
 
     //! \ogs_file_param{prj__processes__process__THERMO_MECHANICS__process_variables}
     auto const pv_config = config.getConfigSubtree("process_variables");
 
-    auto process_variables = findProcessVariables(
-        variables, pv_config,
-        {//! \ogs_file_param_special{prj__processes__process__THERMO_MECHANICS__process_variables__temperature}
-         "temperature",
-         //! \ogs_file_param_special{prj__processes__process__THERMO_MECHANICS__process_variables__displacement}
-         "displacement"});
+    ProcessVariable* variable_T;
+    ProcessVariable* variable_u;
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>
+        process_variables;
+    if (use_monolithic_scheme)  // monolithic scheme.
+    {
+        auto per_process_variables = findProcessVariables(
+            variables, pv_config,
+          {//! \ogs_file_param_special{prj__processes__process__THERMO_MECHANICS__process_variables__temperature}
+             "temperature",
+            //! \ogs_file_param_special{prj__processes__process__THERMO_MECHANICS__process_variables__displacement}
+             "displacement"});
+        variable_T = &per_process_variables[0].get();
+        variable_u = &per_process_variables[1].get();
+        process_variables.push_back(std::move(per_process_variables));
+    }
+    else  // staggered scheme.
+    {
+        using namespace std::string_literals;
+        for (auto const& variable_name : {"temperature"s, "displacement"s})
+        {
+            auto per_process_variables =
+                findProcessVariables(variables, pv_config, {variable_name});
+            process_variables.push_back(std::move(per_process_variables));
+        }
+        variable_T = &process_variables[0][0].get();
+        variable_u = &process_variables[1][0].get();
+    }
 
     DBUG("Associate displacement with process variable \'%s\'.",
-         process_variables[1].get().getName().c_str());
+         variable_u->getName().c_str());
 
-    if (process_variables[1].get().getNumberOfComponents() != DisplacementDim)
+    if (variable_u->getNumberOfComponents() != DisplacementDim)
     {
         OGS_FATAL(
             "Number of components of the process variable '%s' is different "
             "from the displacement dimension: got %d, expected %d",
-            process_variables[1].get().getName().c_str(),
-            process_variables[1].get().getNumberOfComponents(),
+            variable_u->getName().c_str(),
+            variable_u->getNumberOfComponents(),
             DisplacementDim);
     }
 
     DBUG("Associate temperature with process variable \'%s\'.",
-         process_variables[0].get().getName().c_str());
-    if (process_variables[0].get().getNumberOfComponents() != 1)
+         variable_T->getName().c_str());
+    if (variable_T->getNumberOfComponents() != 1)
     {
         OGS_FATAL(
-            "Temperature process variable '%s' is not a scalar variable but has "
+            "Pressure process variable '%s' is not a scalar variable but has "
             "%d components.",
-            process_variables[0].get().getName().c_str(),
-            process_variables[0].get().getNumberOfComponents(),
-            DisplacementDim);
+            variable_T->getName().c_str(),
+            variable_T->getNumberOfComponents());
     }
 
     // Constitutive relation.
@@ -179,7 +206,8 @@ std::unique_ptr<Process> createThermoMechanicsProcess(
     return std::make_unique<ThermoMechanicsProcess<DisplacementDim>>(
         mesh, std::move(jacobian_assembler), parameters, integration_order,
         std::move(process_variables), std::move(process_data),
-        std::move(secondary_variables), std::move(named_function_caller));
+        std::move(secondary_variables), std::move(named_function_caller),
+        use_monolithic_scheme);
 }
 
 template std::unique_ptr<Process> createThermoMechanicsProcess<2>(
diff --git a/ProcessLib/ThermoMechanics/ThermoMechanicsProcess-impl.h b/ProcessLib/ThermoMechanics/ThermoMechanicsProcess-impl.h
index 65ccd511029e28062939c8881b1ba3d7a2f245d2..5429bda2bebcb6011f385924a10876c28e2488a9 100644
--- a/ProcessLib/ThermoMechanics/ThermoMechanicsProcess-impl.h
+++ b/ProcessLib/ThermoMechanics/ThermoMechanicsProcess-impl.h
@@ -28,13 +28,16 @@ ThermoMechanicsProcess<DisplacementDim>::ThermoMechanicsProcess(
     std::unique_ptr<ProcessLib::AbstractJacobianAssembler>&& jacobian_assembler,
     std::vector<std::unique_ptr<ParameterBase>> const& parameters,
     unsigned const integration_order,
-    std::vector<std::reference_wrapper<ProcessVariable>>&& process_variables,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
+        process_variables,
     ThermoMechanicsProcessData<DisplacementDim>&& process_data,
     SecondaryVariableCollection&& secondary_variables,
-    NumLib::NamedFunctionCaller&& named_function_caller)
+    NumLib::NamedFunctionCaller&& named_function_caller,
+    bool const use_monolithic_scheme)
     : Process(mesh, std::move(jacobian_assembler), parameters,
               integration_order, std::move(process_variables),
-              std::move(secondary_variables), std::move(named_function_caller)),
+              std::move(secondary_variables), std::move(named_function_caller),
+              use_monolithic_scheme),
       _process_data(std::move(process_data))
 {
 }
diff --git a/ProcessLib/ThermoMechanics/ThermoMechanicsProcess.h b/ProcessLib/ThermoMechanics/ThermoMechanicsProcess.h
index e5a76051375259743b07ce49e8e8c237de02e7e5..f874a55752a2c25e23f48724fab3fdc9e7fa86bf 100644
--- a/ProcessLib/ThermoMechanics/ThermoMechanicsProcess.h
+++ b/ProcessLib/ThermoMechanics/ThermoMechanicsProcess.h
@@ -32,11 +32,12 @@ public:
             jacobian_assembler,
         std::vector<std::unique_ptr<ParameterBase>> const& parameters,
         unsigned const integration_order,
-        std::vector<std::reference_wrapper<ProcessVariable>>&&
+        std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
             process_variables,
         ThermoMechanicsProcessData<DisplacementDim>&& process_data,
         SecondaryVariableCollection&& secondary_variables,
-        NumLib::NamedFunctionCaller&& named_function_caller);
+        NumLib::NamedFunctionCaller&& named_function_caller,
+        bool const use_monolithic_scheme);
 
     //! \name ODESystem interface
     //! @{
diff --git a/ProcessLib/TwoPhaseFlowWithPP/CreateTwoPhaseFlowWithPPProcess.cpp b/ProcessLib/TwoPhaseFlowWithPP/CreateTwoPhaseFlowWithPPProcess.cpp
index bf626795982f2cef21f626aad16d6853fb9d58bd..9ea7818ea6d2a8c14fd4c5e020e1ac7e9592c50b 100644
--- a/ProcessLib/TwoPhaseFlowWithPP/CreateTwoPhaseFlowWithPPProcess.cpp
+++ b/ProcessLib/TwoPhaseFlowWithPP/CreateTwoPhaseFlowWithPPProcess.cpp
@@ -40,12 +40,15 @@ std::unique_ptr<Process> createTwoPhaseFlowWithPPProcess(
     //! \ogs_file_param{prj__processes__process__TWOPHASE_FLOW_PP__process_variables}
     auto const pv_config = config.getConfigSubtree("process_variables");
 
-    auto process_variables = findProcessVariables(
+    auto per_process_variables = findProcessVariables(
         variables, pv_config,
         {//! \ogs_file_param_special{prj__processes__process__TWOPHASE_FLOW_PP__process_variables__gas_pressure}
          "gas_pressure",
          //! \ogs_file_param_special{prj__processes__process__TWOPHASE_FLOW_PP__process_variables__capillary_pressure}
          "capillary_pressure"});
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>
+        process_variables;
+    process_variables.push_back(std::move(per_process_variables));
 
     SecondaryVariableCollection secondary_variables;
 
diff --git a/ProcessLib/TwoPhaseFlowWithPP/TwoPhaseFlowWithPPProcess.cpp b/ProcessLib/TwoPhaseFlowWithPP/TwoPhaseFlowWithPPProcess.cpp
index ef7cfdfcb05c83e7de1b821599cd9fa684645056..74cb7936d2508c206836cb9f55d2cbe6cd4a3f85 100644
--- a/ProcessLib/TwoPhaseFlowWithPP/TwoPhaseFlowWithPPProcess.cpp
+++ b/ProcessLib/TwoPhaseFlowWithPP/TwoPhaseFlowWithPPProcess.cpp
@@ -28,7 +28,8 @@ TwoPhaseFlowWithPPProcess::TwoPhaseFlowWithPPProcess(
     std::unique_ptr<AbstractJacobianAssembler>&& jacobian_assembler,
     std::vector<std::unique_ptr<ParameterBase>> const& parameters,
     unsigned const integration_order,
-    std::vector<std::reference_wrapper<ProcessVariable>>&& process_variables,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
+        process_variables,
     TwoPhaseFlowWithPPProcessData&& process_data,
     SecondaryVariableCollection&& secondary_variables,
     NumLib::NamedFunctionCaller&& named_function_caller,
diff --git a/ProcessLib/TwoPhaseFlowWithPP/TwoPhaseFlowWithPPProcess.h b/ProcessLib/TwoPhaseFlowWithPP/TwoPhaseFlowWithPPProcess.h
index 3d9c99895e1f44701df787f6a3ee504d3b0ec5e1..b585ecfa3d66cd1ab6b87c440b566bcee79ab7aa 100644
--- a/ProcessLib/TwoPhaseFlowWithPP/TwoPhaseFlowWithPPProcess.h
+++ b/ProcessLib/TwoPhaseFlowWithPP/TwoPhaseFlowWithPPProcess.h
@@ -40,7 +40,7 @@ public:
         std::unique_ptr<AbstractJacobianAssembler>&& jacobian_assembler,
         std::vector<std::unique_ptr<ParameterBase>> const& parameters,
         unsigned const integration_order,
-        std::vector<std::reference_wrapper<ProcessVariable>>&&
+        std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
             process_variables,
         TwoPhaseFlowWithPPProcessData&& process_data,
         SecondaryVariableCollection&& secondary_variables,
diff --git a/ProcessLib/TwoPhaseFlowWithPrho/CreateTwoPhaseFlowWithPrhoProcess.cpp b/ProcessLib/TwoPhaseFlowWithPrho/CreateTwoPhaseFlowWithPrhoProcess.cpp
index 5beb4b9496475d631ebd0ace6b91d2beefab7d4b..1925ef03863b674775e102bbcccf99e0c87bdc2f 100644
--- a/ProcessLib/TwoPhaseFlowWithPrho/CreateTwoPhaseFlowWithPrhoProcess.cpp
+++ b/ProcessLib/TwoPhaseFlowWithPrho/CreateTwoPhaseFlowWithPrhoProcess.cpp
@@ -40,12 +40,15 @@ std::unique_ptr<Process> createTwoPhaseFlowWithPrhoProcess(
     //! \ogs_file_param{prj__processes__process__TWOPHASE_FLOW_PRHO__process_variables}
     auto const pv_config = config.getConfigSubtree("process_variables");
 
-    auto process_variables = findProcessVariables(
+    auto per_process_variables = findProcessVariables(
         variables, pv_config,
         {//! \ogs_file_param_special{prj__processes__process__TWOPHASE_FLOW_PRHO__process_variables__liquid_pressure}
          "liquid_pressure",
          //! \ogs_file_param_special{prj__processes__process__TWOPHASE_FLOW_PRHO__process_variables__overall_mass_density}
          "overall_mass_density"});
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>
+        process_variables;
+    process_variables.push_back(std::move(per_process_variables));
 
     SecondaryVariableCollection secondary_variables;
 
diff --git a/ProcessLib/TwoPhaseFlowWithPrho/TwoPhaseFlowWithPrhoProcess.cpp b/ProcessLib/TwoPhaseFlowWithPrho/TwoPhaseFlowWithPrhoProcess.cpp
index e0d800de4a30da3311b76e3c32ee9283f340e40f..bcc612704cc489a991ebd4c0a1096c64b1633742 100644
--- a/ProcessLib/TwoPhaseFlowWithPrho/TwoPhaseFlowWithPrhoProcess.cpp
+++ b/ProcessLib/TwoPhaseFlowWithPrho/TwoPhaseFlowWithPrhoProcess.cpp
@@ -28,7 +28,8 @@ TwoPhaseFlowWithPrhoProcess::TwoPhaseFlowWithPrhoProcess(
     std::unique_ptr<AbstractJacobianAssembler>&& jacobian_assembler,
     std::vector<std::unique_ptr<ParameterBase>> const& parameters,
     unsigned const integration_order,
-    std::vector<std::reference_wrapper<ProcessVariable>>&& process_variables,
+    std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
+        process_variables,
     TwoPhaseFlowWithPrhoProcessData&& process_data,
     SecondaryVariableCollection&& secondary_variables,
     NumLib::NamedFunctionCaller&& named_function_caller,
diff --git a/ProcessLib/TwoPhaseFlowWithPrho/TwoPhaseFlowWithPrhoProcess.h b/ProcessLib/TwoPhaseFlowWithPrho/TwoPhaseFlowWithPrhoProcess.h
index 6bc0376633dcf2ebcbf9bf3247e03d53c1c25959..db524379a5bf3bb79b9b02fe024b782db88b03ad 100644
--- a/ProcessLib/TwoPhaseFlowWithPrho/TwoPhaseFlowWithPrhoProcess.h
+++ b/ProcessLib/TwoPhaseFlowWithPrho/TwoPhaseFlowWithPrhoProcess.h
@@ -38,7 +38,7 @@ public:
         std::unique_ptr<AbstractJacobianAssembler>&& jacobian_assembler,
         std::vector<std::unique_ptr<ParameterBase>> const& parameters,
         unsigned const integration_order,
-        std::vector<std::reference_wrapper<ProcessVariable>>&&
+        std::vector<std::vector<std::reference_wrapper<ProcessVariable>>>&&
             process_variables,
         TwoPhaseFlowWithPrhoProcessData&& process_data,
         SecondaryVariableCollection&& secondary_variables,
diff --git a/ProcessLib/UncoupledProcessesTimeLoop.cpp b/ProcessLib/UncoupledProcessesTimeLoop.cpp
index b2e6a015c2e60980ef3aa788991bf07a5cbc6a40..eda429e581fea219edb21608a63c164b7727317e 100644
--- a/ProcessLib/UncoupledProcessesTimeLoop.cpp
+++ b/ProcessLib/UncoupledProcessesTimeLoop.cpp
@@ -18,6 +18,7 @@
 #include "NumLib/TimeStepping/CreateTimeStepper.h"
 
 #include "MathLib/LinAlg/LinAlg.h"
+#include "CoupledSolutionsForStaggeredScheme.h"
 
 std::unique_ptr<ProcessLib::Output> createOutput(
     BaseLib::ConfigTree const& config, std::string const& output_directory)
@@ -109,8 +110,6 @@ struct SingleProcessData
         std::unique_ptr<NumLib::ConvergenceCriterion>&& conv_crit_,
         std::unique_ptr<NumLib::TimeDiscretization>&& time_disc_,
         Process& process_,
-        std::unordered_map<std::type_index, Process const&>&&
-            coupled_processes_,
         ProcessOutput&& process_output_);
 
     SingleProcessData(SingleProcessData&& spd);
@@ -136,8 +135,6 @@ struct SingleProcessData
     NumLib::InternalMatrixStorage* mat_strg = nullptr;
 
     Process& process;
-    /// Coupled processes.
-    std::unordered_map<std::type_index, Process const&> const coupled_processes;
     ProcessOutput process_output;
 };
 
@@ -148,7 +145,6 @@ SingleProcessData::SingleProcessData(
     std::unique_ptr<NumLib::ConvergenceCriterion>&& conv_crit_,
     std::unique_ptr<NumLib::TimeDiscretization>&& time_disc_,
     Process& process_,
-    std::unordered_map<std::type_index, Process const&>&& coupled_processes_,
     ProcessOutput&& process_output_)
     : timestepper(std::move(timestepper_)),
       nonlinear_solver_tag(NLTag),
@@ -157,7 +153,6 @@ SingleProcessData::SingleProcessData(
       conv_crit(std::move(conv_crit_)),
       time_disc(std::move(time_disc_)),
       process(process_),
-      coupled_processes(coupled_processes_),
       process_output(std::move(process_output_))
 {
 }
@@ -172,7 +167,6 @@ SingleProcessData::SingleProcessData(SingleProcessData&& spd)
       tdisc_ode_sys(std::move(spd.tdisc_ode_sys)),
       mat_strg(spd.mat_strg),
       process(spd.process),
-      coupled_processes(spd.coupled_processes),
       process_output(std::move(spd.process_output))
 {
     spd.mat_strg = nullptr;
@@ -237,7 +231,6 @@ std::unique_ptr<SingleProcessData> makeSingleProcessData(
     Process& process,
     std::unique_ptr<NumLib::TimeDiscretization>&& time_disc,
     std::unique_ptr<NumLib::ConvergenceCriterion>&& conv_crit,
-    std::unordered_map<std::type_index, Process const&>&& coupled_processes,
     ProcessOutput&& process_output)
 {
     using Tag = NumLib::NonlinearSolverTag;
@@ -249,7 +242,7 @@ std::unique_ptr<SingleProcessData> makeSingleProcessData(
         return std::make_unique<SingleProcessData>(
             std::move(timestepper), *nonlinear_solver_picard,
             std::move(conv_crit), std::move(time_disc), process,
-            std::move(coupled_processes), std::move(process_output));
+            std::move(process_output));
     }
     if (auto* nonlinear_solver_newton =
             dynamic_cast<NumLib::NonlinearSolver<Tag::Newton>*>(
@@ -258,7 +251,7 @@ std::unique_ptr<SingleProcessData> makeSingleProcessData(
         return std::make_unique<SingleProcessData>(
             std::move(timestepper), *nonlinear_solver_newton,
             std::move(conv_crit), std::move(time_disc), process,
-            std::move(coupled_processes), std::move(process_output));
+            std::move(process_output));
     }
 
     OGS_FATAL("Encountered unknown nonlinear solver type. Aborting");
@@ -300,45 +293,29 @@ std::vector<std::unique_ptr<SingleProcessData>> createPerProcessData(
             //! \ogs_file_param{prj__time_loop__processes__process__convergence_criterion}
             pcs_config.getConfigSubtree("convergence_criterion"));
 
-        auto const& coupled_process_tree
-            //! \ogs_file_param{prj__time_loop__processes__process__coupled_processes}
-            = pcs_config.getConfigSubtreeOptional("coupled_processes");
-        std::unordered_map<std::type_index, Process const&> coupled_processes;
-        if (coupled_process_tree)
-        {
-            for (
-                auto const cpl_pcs_name :
-                //! \ogs_file_param{prj__time_loop__processes__process__coupled_processes__coupled_process}
-                coupled_process_tree->getConfigParameterList<std::string>(
-                    "coupled_process"))
-            {
-                auto const& coupled_process = *BaseLib::getOrError(
-                    processes, cpl_pcs_name,
-                    "A process with the given name has not been defined.");
-
-                auto const inserted = coupled_processes.emplace(
-                    std::type_index(typeid(coupled_process)), coupled_process);
-                if (!inserted.second)
-                {  // insertion failed, i.e., key already exists
-                    OGS_FATAL("Coupled process `%s' already exists.",
-                              cpl_pcs_name.data());
-                }
-            }
-        }
-
         //! \ogs_file_param{prj__time_loop__processes__process__output}
         ProcessOutput process_output{pcs_config.getConfigSubtree("output")};
 
         per_process_data.emplace_back(makeSingleProcessData(
             std::move(timestepper), nl_slv, pcs, std::move(time_disc),
-            std::move(conv_crit), std::move(coupled_processes),
-            std::move(process_output)));
+            std::move(conv_crit), std::move(process_output)));
     }
 
     if (per_process_data.size() != processes.size())
-        OGS_FATAL(
-            "Some processes have not been configured to be solved by this time "
-            "time loop.");
+    {
+        if (processes.size() > 1)
+        {
+            OGS_FATAL(
+                "Some processes have not been configured to be solved by this "
+                " time loop.");
+        }
+        else
+        {
+            INFO(
+                "The equations of the coupled processes will be solved by the "
+                "staggered scheme.")
+        }
+    }
 
     return per_process_data;
 }
@@ -413,7 +390,7 @@ std::vector<GlobalVector*> setInitialConditions(
                 ode_sys.getMatrixSpecifications()));
 
         auto& x0 = *process_solutions[pcs_idx];
-        pcs.setInitialConditions(t0, x0);
+        pcs.setInitialConditions(pcs_idx, t0, x0);
         MathLib::LinAlg::finalizeAssembly(x0);
 
         time_disc.setInitialState(t0, x0);  // push IC
@@ -427,7 +404,8 @@ std::vector<GlobalVector*> setInitialConditions(
             setEquationSystem(nonlinear_solver, ode_sys, conv_crit, nl_tag);
             nonlinear_solver.assemble(x0);
             time_disc.pushState(
-                t0, x0, mat_strg);  // TODO: that might do duplicate work
+                t0, x0,
+                mat_strg);  // TODO: that might do duplicate work
         }
 
         ++pcs_idx;
@@ -436,7 +414,8 @@ std::vector<GlobalVector*> setInitialConditions(
     return process_solutions;
 }
 
-bool solveOneTimeStepOneProcess(GlobalVector& x, std::size_t const timestep,
+bool solveOneTimeStepOneProcess(const unsigned process_index,
+                                GlobalVector& x, std::size_t const timestep,
                                 double const t, double const delta_t,
                                 SingleProcessData& process_data,
                                 Output const& output_control)
@@ -462,7 +441,8 @@ bool solveOneTimeStepOneProcess(GlobalVector& x, std::size_t const timestep,
     auto const post_iteration_callback = [&](unsigned iteration,
                                              GlobalVector const& x) {
         output_control.doOutputNonlinearIteration(
-            process, process_data.process_output, timestep, t, x, iteration);
+            process, process_index, process_data.process_output, timestep, t,
+            x, iteration);
     };
 
     bool nonlinear_solver_succeeded =
@@ -488,56 +468,29 @@ UncoupledProcessesTimeLoop::UncoupledProcessesTimeLoop(
 
 bool UncoupledProcessesTimeLoop::setCoupledSolutions()
 {
-    // Do nothing if process are not coupled
-    if ((!_global_coupling_conv_crit) || _global_coupling_max_iterations == 1)
+    // All _per_process_data share one process
+    const bool use_monolithic_scheme =
+        _per_process_data[0]->process.isMonolithicSchemeUsed();
+    if (use_monolithic_scheme)
+    {
         return false;
+    }
 
-    unsigned pcs_idx = 0;
     _solutions_of_coupled_processes.reserve(_per_process_data.size());
-    for (auto& spd : _per_process_data)
+    for (unsigned pcs_idx = 0; pcs_idx < _per_process_data.size(); pcs_idx++)
     {
-        auto const& coupled_processes = spd->coupled_processes;
-        std::unordered_map<std::type_index, GlobalVector const&> coupled_xs;
-        for (auto const& coupled_process_pair : coupled_processes)
-        {
-            ProcessLib::Process const& coupled_process =
-                coupled_process_pair.second;
-            auto const found_item = std::find_if(
-                _per_process_data.begin(),
-                _per_process_data.end(),
-                [&coupled_process](
-                    std::unique_ptr<SingleProcessData> const& item) {
-                    auto const& item_process = item->process;
-                    return std::type_index(typeid(coupled_process)) ==
-                           std::type_index(typeid(item_process));
-                });
-
-            if (found_item != _per_process_data.end())
-            {
-                // Id of the coupled process:
-                const std::size_t c_id =
-                    std::distance(_per_process_data.begin(), found_item);
-
-                BaseLib::insertIfTypeIndexKeyUniqueElseError(
-                    coupled_xs, coupled_process_pair.first,
-                    *_process_solutions[c_id], "global_coupled_x");
-            }
-        }
-        _solutions_of_coupled_processes.emplace_back(coupled_xs);
-
         auto const& x = *_process_solutions[pcs_idx];
+        _solutions_of_coupled_processes.emplace_back(x);
 
         // Create a vector to store the solution of the last coupling iteration
-        auto& x_coupling0 = NumLib::GlobalVectorProvider::provider.getVector(x);
-        MathLib::LinAlg::copy(x, x_coupling0);
+        auto& x0 = NumLib::GlobalVectorProvider::provider.getVector(x);
+        MathLib::LinAlg::copy(x, x0);
 
         // append a solution vector of suitable size
-        _solutions_of_last_cpl_iteration.emplace_back(&x_coupling0);
-
-        ++pcs_idx;
-    }  // end of for (auto& spd : _per_process_data)
+        _solutions_of_last_cpl_iteration.emplace_back(&x0);
+    }
 
-    return true;
+    return true;  // use staggered scheme.
 }
 
 double UncoupledProcessesTimeLoop::computeTimeStepping(
@@ -575,7 +528,9 @@ double UncoupledProcessesTimeLoop::computeTimeStepping(
                 : 0.;
 
         if (!ppd.nonlinear_solver_converged)
+        {
             timestepper->setAcceptedOrNot(false);
+        }
 
         if (!timestepper->next(solution_error) &&
             // In case of FixedTimeStepping, which makes timestepper->next(...)
@@ -632,7 +587,9 @@ double UncoupledProcessesTimeLoop::computeTimeStepping(
         timestepper->resetCurrentTimeStep(dt);
 
         if (ppd.skip_time_stepping)
+        {
             continue;
+        }
 
         if (t == timestepper->begin())
         {
@@ -747,16 +704,20 @@ bool UncoupledProcessesTimeLoop::loop()
         const double prev_dt = dt;
 
         const std::size_t timesteps = accepted_steps + 1;
-        // TODO, input option for time unit.
+        // TODO(wenqing): , input option for time unit.
         INFO("=== Time stepping at step #%u and time %g with step size %g",
              timesteps, t, dt);
 
         if (is_staggered_coupling)
+        {
             nonlinear_solver_succeeded =
                 solveCoupledEquationSystemsByStaggeredScheme(t, dt, timesteps);
+        }
         else
+        {
             nonlinear_solver_succeeded =
                 solveUncoupledEquationSystems(t, dt, timesteps);
+        }
 
         INFO("[time] Time step #%u took %g s.", timesteps,
              time_timestep.elapsed());
@@ -765,7 +726,9 @@ bool UncoupledProcessesTimeLoop::loop()
 
         if (t + dt > _end_time ||
             t + std::numeric_limits<double>::epsilon() > _end_time)
+        {
             break;
+        }
 
         if (dt < std::numeric_limits<double>::epsilon())
         {
@@ -823,7 +786,7 @@ static std::string nonlinear_fixed_dt_fails_info =
 bool UncoupledProcessesTimeLoop::solveUncoupledEquationSystems(
     const double t, const double dt, const std::size_t timestep_id)
 {
-    // TODO use process name
+    // TODO(wenqing): use process name
     unsigned pcs_idx = 0;
     for (auto& spd : _per_process_data)
     {
@@ -842,7 +805,7 @@ bool UncoupledProcessesTimeLoop::solveUncoupledEquationSystems(
         pcs.preTimestep(x, t, dt, pcs_idx);
 
         const auto nonlinear_solver_succeeded =
-            solveOneTimeStepOneProcess(x, timestep_id, t, dt, *spd, *_output);
+            solveOneTimeStepOneProcess(pcs_idx, x, timestep_id, t, dt, *spd, *_output);
         spd->nonlinear_solver_converged = nonlinear_solver_succeeded;
         pcs.postTimestep(x);
         pcs.computeSecondaryVariable(t, x);
@@ -857,7 +820,7 @@ bool UncoupledProcessesTimeLoop::solveUncoupledEquationSystems(
                 timestep_id, t, pcs_idx);
 
             // save unsuccessful solution
-            _output->doOutputAlways(pcs, spd->process_output, timestep_id, t,
+            _output->doOutputAlways(pcs, pcs_idx, spd->process_output, timestep_id, t,
                                     x);
 
             if (!spd->timestepper->isSolutionErrorComputationNeeded())
@@ -868,7 +831,7 @@ bool UncoupledProcessesTimeLoop::solveUncoupledEquationSystems(
             return false;
         }
 
-        _output->doOutput(pcs, spd->process_output, timestep_id, t, x);
+        _output->doOutput(pcs, pcs_idx, spd->process_output, timestep_id, t, x);
 
         ++pcs_idx;
     }  // end of for (auto& spd : _per_process_data)
@@ -890,7 +853,7 @@ bool UncoupledProcessesTimeLoop::solveCoupledEquationSystemsByStaggeredScheme(
          global_coupling_iteration < _global_coupling_max_iterations;
          global_coupling_iteration++, _global_coupling_conv_crit->reset())
     {
-        // TODO use process name
+        // TODO(wenqing): use process name
         bool nonlinear_solver_succeeded = true;
         coupling_iteration_converged = true;
         unsigned pcs_idx = 0;
@@ -917,14 +880,13 @@ bool UncoupledProcessesTimeLoop::solveCoupledEquationSystemsByStaggeredScheme(
             }
 
             CoupledSolutionsForStaggeredScheme coupled_solutions(
-                spd->coupled_processes,
-                _solutions_of_coupled_processes[pcs_idx], dt);
+                _solutions_of_coupled_processes, dt, pcs_idx);
 
             spd->process.setCoupledSolutionsForStaggeredScheme(
                 &coupled_solutions);
 
             const auto nonlinear_solver_succeeded = solveOneTimeStepOneProcess(
-                x, timestep_id, t, dt, *spd, *_output);
+                pcs_idx, x, timestep_id, t, dt, *spd, *_output);
             spd->nonlinear_solver_converged = nonlinear_solver_succeeded;
 
             INFO(
@@ -941,7 +903,8 @@ bool UncoupledProcessesTimeLoop::solveCoupledEquationSystemsByStaggeredScheme(
                     timestep_id, t, pcs_idx);
 
                 // save unsuccessful solution
-                _output->doOutputAlways(spd->process, spd->process_output,
+                _output->doOutputAlways(spd->process, pcs_idx,
+                                        spd->process_output,
                                         timestep_id, t, x);
 
                 if (!spd->timestepper->isSolutionErrorComputationNeeded())
@@ -960,15 +923,20 @@ bool UncoupledProcessesTimeLoop::solveCoupledEquationSystemsByStaggeredScheme(
                 coupling_iteration_converged &&
                 _global_coupling_conv_crit->isSatisfied();
 
+            MathLib::LinAlg::copy(x, x_old);
+
             if (coupling_iteration_converged)
+            {
                 break;
-            MathLib::LinAlg::copy(x, x_old);
+            }
 
             ++pcs_idx;
         }  // end of for (auto& spd : _per_process_data)
 
         if (coupling_iteration_converged)
+        {
             break;
+        }
 
         if (!nonlinear_solver_succeeded)
         {
@@ -1031,24 +999,27 @@ void UncoupledProcessesTimeLoop::outputSolutions(
         auto const& x = *_process_solutions[pcs_idx];
 
         if (output_initial_condition)
+        {
             pcs.preTimestep(x, _start_time,
                             spd->timestepper->getTimeStep().dt(), pcs_idx);
+        }
         if (is_staggered_coupling)
         {
             CoupledSolutionsForStaggeredScheme coupled_solutions(
-                spd->coupled_processes,
-                _solutions_of_coupled_processes[pcs_idx], 0.0);
+                _solutions_of_coupled_processes, 0.0, pcs_idx);
 
             spd->process.setCoupledSolutionsForStaggeredScheme(
                 &coupled_solutions);
-            spd->process
-                .setCoupledSolutionsForStaggeredSchemeToLocalAssemblers();
-            (output_object.*output_class_member)(pcs, spd->process_output,
+            spd->process.setCoupledTermForTheStaggeredSchemeToLocalAssemblers();
+            (output_object.*output_class_member)(pcs, pcs_idx,
+                                                 spd->process_output,
                                                  timestep, t, x);
         }
         else
-            (output_object.*output_class_member)(pcs, spd->process_output,
-                                                 timestep, t, x);
+        {
+            (output_object.*output_class_member)(
+                pcs, pcs_idx, spd->process_output, timestep, t, x);
+        }
 
         ++pcs_idx;
     }
@@ -1057,10 +1028,14 @@ void UncoupledProcessesTimeLoop::outputSolutions(
 UncoupledProcessesTimeLoop::~UncoupledProcessesTimeLoop()
 {
     for (auto* x : _process_solutions)
+    {
         NumLib::GlobalVectorProvider::provider.releaseVector(*x);
+    }
 
     for (auto* x : _solutions_of_last_cpl_iteration)
+    {
         NumLib::GlobalVectorProvider::provider.releaseVector(*x);
+    }
 }
 
 }  // namespace ProcessLib
diff --git a/ProcessLib/UncoupledProcessesTimeLoop.h b/ProcessLib/UncoupledProcessesTimeLoop.h
index 6f1d7e03d7df77f46b4484c947fe7bb457bd75e7..0125ba1d6586697f9cc4bebb81ac86cbd1b5175e 100644
--- a/ProcessLib/UncoupledProcessesTimeLoop.h
+++ b/ProcessLib/UncoupledProcessesTimeLoop.h
@@ -10,9 +10,7 @@
 #pragma once
 
 #include <memory>
-#include <unordered_map>
-#include <typeinfo>
-#include <typeindex>
+#include <functional>
 
 #include <logog/include/logog.hpp>
 
@@ -77,11 +75,11 @@ private:
     std::unique_ptr<NumLib::ConvergenceCriterion> _global_coupling_conv_crit;
 
     /**
-     *  Vector of solutions of coupled processes of processes.
+     *  Vector of solutions of the coupled processes.
      *  Each vector element stores the references of the solution vectors
      *  (stored in _process_solutions) of the coupled processes of a process.
      */
-    std::vector<std::unordered_map<std::type_index, GlobalVector const&>>
+    std::vector<std::reference_wrapper<GlobalVector const>>
         _solutions_of_coupled_processes;
 
     /// Solutions of the previous coupling iteration for the convergence
diff --git a/ProcessLib/VectorMatrixAssembler.cpp b/ProcessLib/VectorMatrixAssembler.cpp
index 690c9bcc23ce11b2615169a55e970cbd5cf65bbf..a78ec63364b2bcdc87c1a7ed8458d904f4065cc0 100644
--- a/ProcessLib/VectorMatrixAssembler.cpp
+++ b/ProcessLib/VectorMatrixAssembler.cpp
@@ -15,40 +15,11 @@
 #include "MathLib/LinAlg/Eigen/EigenMapTools.h"
 #include "LocalAssemblerInterface.h"
 
+#include "CoupledSolutionsForStaggeredScheme.h"
 #include "Process.h"
 
 namespace ProcessLib
 {
-static std::unordered_map<std::type_index, const std::vector<double>>
-getPreviousLocalSolutionsOfCoupledProcesses(
-    const CoupledSolutionsForStaggeredScheme& coupled_solutions,
-    const std::vector<GlobalIndexType>& indices)
-{
-    std::unordered_map<std::type_index, const std::vector<double>>
-        local_coupled_xs0;
-
-    for (auto const& coupled_process_pair : coupled_solutions.coupled_processes)
-    {
-        auto const& coupled_pcs = coupled_process_pair.second;
-        auto const prevous_time_x = coupled_pcs.getPreviousTimeStepSolution();
-        if (prevous_time_x)
-        {
-            auto const local_coupled_x0 = prevous_time_x->get(indices);
-            BaseLib::insertIfTypeIndexKeyUniqueElseError(
-                local_coupled_xs0, coupled_process_pair.first, local_coupled_x0,
-                "local_coupled_x0");
-        }
-        else
-        {
-            const std::vector<double> local_coupled_x0;
-            BaseLib::insertIfTypeIndexKeyUniqueElseError(
-                local_coupled_xs0, coupled_process_pair.first, local_coupled_x0,
-                "local_coupled_x0");
-        }
-    }
-    return local_coupled_xs0;
-}
-
 VectorMatrixAssembler::VectorMatrixAssembler(
     std::unique_ptr<AbstractJacobianAssembler>&& jacobian_assembler)
     : _jacobian_assembler(std::move(jacobian_assembler))
@@ -70,42 +41,32 @@ void VectorMatrixAssembler::assemble(
     const std::size_t mesh_item_id, LocalAssemblerInterface& local_assembler,
     const NumLib::LocalToGlobalIndexMap& dof_table, const double t,
     const GlobalVector& x, GlobalMatrix& M, GlobalMatrix& K, GlobalVector& b,
-    const CoupledSolutionsForStaggeredScheme* coupled_solutions)
+    const CoupledSolutionsForStaggeredScheme* cpl_xs)
 {
     auto const indices = NumLib::getIndices(mesh_item_id, dof_table);
-    auto const local_x = x.get(indices);
 
     _local_M_data.clear();
     _local_K_data.clear();
     _local_b_data.clear();
 
-    if (!coupled_solutions)
+    if (cpl_xs == nullptr)
     {
+        auto const local_x = x.get(indices);
         local_assembler.assemble(t, local_x, _local_M_data, _local_K_data,
                                  _local_b_data);
     }
     else
     {
-        auto local_coupled_xs0 = getPreviousLocalSolutionsOfCoupledProcesses(
-            *coupled_solutions, indices);
-        auto local_coupled_xs = getCurrentLocalSolutionsOfCoupledProcesses(
-            coupled_solutions->coupled_xs, indices);
-
-        if (local_coupled_xs0.empty() || local_coupled_xs.empty())
-        {
-            local_assembler.assemble(t, local_x, _local_M_data, _local_K_data,
-                                     _local_b_data);
-        }
-        else
-        {
-            ProcessLib::LocalCoupledSolutions local_coupled_solutions(
-                coupled_solutions->dt, coupled_solutions->coupled_processes,
-                std::move(local_coupled_xs0), std::move(local_coupled_xs));
-
-            local_assembler.assembleWithCoupledTerm(
-                t, local_x, _local_M_data, _local_K_data, _local_b_data,
-                local_coupled_solutions);
-        }
+        auto local_coupled_xs0 = getPreviousLocalSolutions(*cpl_xs, indices);
+        auto local_coupled_xs = getCurrentLocalSolutions(*cpl_xs, indices);
+
+        ProcessLib::LocalCoupledSolutions local_coupled_solutions(
+            cpl_xs->dt, cpl_xs->process_id, std::move(local_coupled_xs0),
+            std::move(local_coupled_xs));
+
+        local_assembler.assembleWithCoupledTerm(t, _local_M_data, _local_K_data,
+                                                _local_b_data,
+                                                local_coupled_solutions);
     }
 
     auto const num_r_c = indices.size();
@@ -134,10 +95,9 @@ void VectorMatrixAssembler::assembleWithJacobian(
     NumLib::LocalToGlobalIndexMap const& dof_table, const double t,
     GlobalVector const& x, GlobalVector const& xdot, const double dxdot_dx,
     const double dx_dx, GlobalMatrix& M, GlobalMatrix& K, GlobalVector& b,
-    GlobalMatrix& Jac, const CoupledSolutionsForStaggeredScheme* coupled_solutions)
+    GlobalMatrix& Jac, const CoupledSolutionsForStaggeredScheme* cpl_xs)
 {
     auto const indices = NumLib::getIndices(mesh_item_id, dof_table);
-    auto const local_x = x.get(indices);
     auto const local_xdot = xdot.get(indices);
 
     _local_M_data.clear();
@@ -145,35 +105,26 @@ void VectorMatrixAssembler::assembleWithJacobian(
     _local_b_data.clear();
     _local_Jac_data.clear();
 
-    if (!coupled_solutions)
+    if (cpl_xs == nullptr)
     {
+        auto const local_x = x.get(indices);
         _jacobian_assembler->assembleWithJacobian(
             local_assembler, t, local_x, local_xdot, dxdot_dx, dx_dx,
             _local_M_data, _local_K_data, _local_b_data, _local_Jac_data);
     }
     else
     {
-        auto local_coupled_xs0 = getPreviousLocalSolutionsOfCoupledProcesses(
-            *coupled_solutions, indices);
-        auto local_coupled_xs = getCurrentLocalSolutionsOfCoupledProcesses(
-            coupled_solutions->coupled_xs, indices);
-        if (local_coupled_xs0.empty() || local_coupled_xs.empty())
-        {
-            _jacobian_assembler->assembleWithJacobian(
-                local_assembler, t, local_x, local_xdot, dxdot_dx, dx_dx,
-                _local_M_data, _local_K_data, _local_b_data, _local_Jac_data);
-        }
-        else
-        {
-            ProcessLib::LocalCoupledSolutions local_coupled_solutions(
-                coupled_solutions->dt, coupled_solutions->coupled_processes,
-                std::move(local_coupled_xs0), std::move(local_coupled_xs));
-
-            _jacobian_assembler->assembleWithJacobianAndCoupling(
-                local_assembler, t, local_x, local_xdot, dxdot_dx, dx_dx,
-                _local_M_data, _local_K_data, _local_b_data, _local_Jac_data,
-                local_coupled_solutions);
-        }
+        auto local_coupled_xs0 = getPreviousLocalSolutions(*cpl_xs, indices);
+        auto local_coupled_xs = getCurrentLocalSolutions(*cpl_xs, indices);
+
+        ProcessLib::LocalCoupledSolutions local_coupled_solutions(
+            cpl_xs->dt, cpl_xs->process_id, std::move(local_coupled_xs0),
+            std::move(local_coupled_xs));
+
+        _jacobian_assembler->assembleWithJacobianAndCoupling(
+            local_assembler, t, local_xdot, dxdot_dx, dx_dx, _local_M_data,
+            _local_K_data, _local_b_data, _local_Jac_data,
+            local_coupled_solutions);
     }
 
     auto const num_r_c = indices.size();
@@ -209,4 +160,4 @@ void VectorMatrixAssembler::assembleWithJacobian(
     }
 }
 
-}  // ProcessLib
+}  // namespace ProcessLib
diff --git a/ProcessLib/VectorMatrixAssembler.h b/ProcessLib/VectorMatrixAssembler.h
index a1203361b4d6084aec87bfd4fca0aa54b22aaa6d..38b7b855a7f29d89dd8c8ead8eae71cd6ef1f40c 100644
--- a/ProcessLib/VectorMatrixAssembler.h
+++ b/ProcessLib/VectorMatrixAssembler.h
@@ -21,6 +21,8 @@ class LocalToGlobalIndexMap;
 
 namespace ProcessLib
 {
+struct CoupledSolutionsForStaggeredScheme;
+
 class LocalAssemblerInterface;
 
 //! Utility class used to assemble global matrices and vectors.
@@ -45,7 +47,7 @@ public:
                   NumLib::LocalToGlobalIndexMap const& dof_table,
                   double const t, GlobalVector const& x, GlobalMatrix& M,
                   GlobalMatrix& K, GlobalVector& b,
-                  const CoupledSolutionsForStaggeredScheme* coupled_solutions);
+                  const CoupledSolutionsForStaggeredScheme* cpl_xs);
 
     //! Assembles \c M, \c K, \c b, and the Jacobian \c Jac of the residual.
     //! \note The Jacobian must be assembled.
@@ -57,7 +59,7 @@ public:
                               const double dx_dx, GlobalMatrix& M,
                               GlobalMatrix& K, GlobalVector& b,
                               GlobalMatrix& Jac,
-                              const CoupledSolutionsForStaggeredScheme* coupled_solutions);
+                              const CoupledSolutionsForStaggeredScheme* cpl_xs);
 
 private:
     // temporary data only stored here in order to avoid frequent memory
diff --git a/Tests/lfs-data/Parabolic/HT/ConstViscosity/square_5500x5500.prj b/Tests/lfs-data/Parabolic/HT/ConstViscosity/square_5500x5500.prj
index 81faa39a19b70a62a3b02a9b7abf2029fbd169d1..03740f603c4022c9dd7ebd635360c7a44f56595a 100644
--- a/Tests/lfs-data/Parabolic/HT/ConstViscosity/square_5500x5500.prj
+++ b/Tests/lfs-data/Parabolic/HT/ConstViscosity/square_5500x5500.prj
@@ -44,12 +44,13 @@
                 </porous_medium>
             </porous_medium>
             <density_solid>rho_solid</density_solid>
-            <fluid_reference_density>rho_fluid</fluid_reference_density>
             <specific_heat_capacity_solid>c_solid</specific_heat_capacity_solid>
             <thermal_conductivity_solid>lambda_solid</thermal_conductivity_solid>
             <thermal_conductivity_fluid>lambda_fluid</thermal_conductivity_fluid>
-            <thermal_dispersivity_longitudinal>alpha_l</thermal_dispersivity_longitudinal>
-            <thermal_dispersivity_transversal>alpha_t</thermal_dispersivity_transversal>
+            <thermal_dispersivity>
+                <longitudinal>alpha_l</longitudinal>
+                <transversal>alpha_t</transversal>
+            </thermal_dispersivity>
             <specific_body_force>0 -9.81</specific_body_force>
             <secondary_variables>
                 <secondary_variable type="static" internal_name="darcy_velocity" output_name="darcy_velocity"/>
diff --git a/Tests/lfs-data/Parabolic/HT/StaggeredCoupling/ConstViscosity/square_5500x5500.gml b/Tests/lfs-data/Parabolic/HT/StaggeredCoupling/ConstViscosity/square_5500x5500.gml
new file mode 100644
index 0000000000000000000000000000000000000000..032286fc8f97784b6f71509a15840125436e14fe
--- /dev/null
+++ b/Tests/lfs-data/Parabolic/HT/StaggeredCoupling/ConstViscosity/square_5500x5500.gml
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b35708cfa327cd080308516153e5d97cfca995b75378c47462f9b64a63b21d5
+size 1044
diff --git a/Tests/lfs-data/Parabolic/HT/StaggeredCoupling/ConstViscosity/square_5500x5500.vtu b/Tests/lfs-data/Parabolic/HT/StaggeredCoupling/ConstViscosity/square_5500x5500.vtu
new file mode 100644
index 0000000000000000000000000000000000000000..3df5c5201c9e653cb43c9809d103ffff3390aacc
--- /dev/null
+++ b/Tests/lfs-data/Parabolic/HT/StaggeredCoupling/ConstViscosity/square_5500x5500.vtu
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f5d32a50ce7d3eddd0f4f4b1f33de2230d2e49a24aa1b55511222e3cf038f68
+size 138816
diff --git a/Tests/lfs-data/Parabolic/HT/StaggeredCoupling/ConstViscosity/square_5500x5500_staggered_scheme.prj b/Tests/lfs-data/Parabolic/HT/StaggeredCoupling/ConstViscosity/square_5500x5500_staggered_scheme.prj
new file mode 100644
index 0000000000000000000000000000000000000000..fe303865d284a2b490b21ad3db57d5d5b100fa6a
--- /dev/null
+++ b/Tests/lfs-data/Parabolic/HT/StaggeredCoupling/ConstViscosity/square_5500x5500_staggered_scheme.prj
@@ -0,0 +1,480 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<OpenGeoSysProject>
+    <mesh>square_5500x5500.vtu</mesh>
+    <geometry>square_5500x5500.gml</geometry>
+    <processes>
+        <process>
+            <name>ConstViscosityThermalConvection</name>
+            <type>HT</type>
+            <coupling_scheme>staggered</coupling_scheme>
+            <integration_order>2</integration_order>
+            <process_variables>
+                <temperature>T</temperature>
+                <pressure>p</pressure>
+            </process_variables>
+            <fluid>
+                <density>
+                    <type>TemperatureDependent</type>
+                    <rho0>1000</rho0>
+                    <temperature0>20</temperature0>
+                    <beta>4.3e-4</beta>
+                </density>
+                <viscosity>
+                    <type>Constant</type>
+                    <value>1.0e-3</value>
+                </viscosity>
+                <specific_heat_capacity>
+                    <type>Constant</type>
+                    <value>4200</value>
+                </specific_heat_capacity>
+            </fluid>
+            <porous_medium>
+                <porous_medium id="0">
+                    <permeability>
+                        <permeability_tensor_entries>kappa1</permeability_tensor_entries>
+                        <type>Constant</type>
+                    </permeability>
+                    <porosity>
+                        <type>Constant</type>
+                        <porosity_parameter>constant_porosity_parameter</porosity_parameter>
+                    </porosity>
+                    <storage>
+                        <type>Constant</type>
+                        <value>0.0</value>
+                    </storage>
+                </porous_medium>
+            </porous_medium>
+            <density_solid>rho_solid</density_solid>
+            <specific_heat_capacity_solid>c_solid</specific_heat_capacity_solid>
+            <thermal_conductivity_solid>lambda_solid</thermal_conductivity_solid>
+            <thermal_conductivity_fluid>lambda_fluid</thermal_conductivity_fluid>
+            <specific_body_force>0 -9.81</specific_body_force>
+            <secondary_variables>
+                <secondary_variable type="static" internal_name="darcy_velocity" output_name="darcy_velocity"/>
+            </secondary_variables>
+        </process>
+    </processes>
+    <time_loop>
+        <global_process_coupling>
+            <max_iter> 6 </max_iter>
+            <convergence_criterion>
+                <type>Residual</type>
+                <norm_type>NORM2</norm_type>
+                <reltol>1.e-10</reltol>
+            </convergence_criterion>
+        </global_process_coupling>
+
+        <processes>
+            <process ref="ConstViscosityThermalConvection">
+                <nonlinear_solver>picard_T</nonlinear_solver>
+                <convergence_criterion>
+                    <type>DeltaX</type>
+                    <norm_type>NORM2</norm_type>
+                    <abstol>1.e-3</abstol>
+                </convergence_criterion>
+                <time_discretization>
+                    <type>BackwardEuler</type>
+                </time_discretization>
+                <output>
+                    <variables>
+                        <variable>T</variable>
+                        <variable>darcy_velocity</variable>
+                    </variables>
+                </output>
+                <time_stepping>
+                    <type>FixedTimeStepping</type>
+                    <t_initial> 0.0 </t_initial>
+                    <t_end> 5e10 </t_end>
+                    <timesteps>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1e-7</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1e-5</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1e-3</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1e-1</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>10</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>100</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1e4</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>2e4</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>4e4</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>8e4</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1e5</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>2e5</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>4e5</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>8e5</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1e6</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>2e6</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>4e6</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>8e6</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1e7</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>2e7</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>4e7</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>2e8</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>100</repeat>
+                            <delta_t>4e8</delta_t>
+                        </pair>
+                    </timesteps>
+                </time_stepping>
+            </process>
+            <process ref="ConstViscosityThermalConvection">
+                <nonlinear_solver>picard_H</nonlinear_solver>
+                <convergence_criterion>
+                    <type>DeltaX</type>
+                    <norm_type>NORM2</norm_type>
+                    <abstol>1.e-3</abstol>
+                </convergence_criterion>
+                <time_discretization>
+                    <type>BackwardEuler</type>
+                </time_discretization>
+                <output>
+                    <variables>
+                        <variable>p</variable>
+                    </variables>
+                </output>
+                <time_stepping>
+                    <type>FixedTimeStepping</type>
+                    <t_initial> 0.0 </t_initial>
+                    <t_end> 5e10 </t_end>
+                    <timesteps>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1e-7</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1e-5</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1e-3</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1e-1</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>10</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>100</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1e4</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>2e4</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>4e4</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>8e4</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1e5</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>2e5</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>4e5</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>8e5</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1e6</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>2e6</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>4e6</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>8e6</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>1e7</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>2e7</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>4e7</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>1</repeat>
+                            <delta_t>2e8</delta_t>
+                        </pair>
+                        <pair>
+                            <repeat>100</repeat>
+                            <delta_t>4e8</delta_t>
+                        </pair>
+                    </timesteps>
+                </time_stepping>
+            </process>
+        </processes>
+        <output>
+            <type>VTK</type>
+            <prefix>ConstViscosityThermalConvection</prefix>
+            <timesteps>
+                <pair>
+                    <repeat> 1 </repeat>
+                    <each_steps> 149 </each_steps>
+                </pair>
+            </timesteps>
+        </output>
+    </time_loop>
+    <parameters>
+        <parameter>
+            <name>rho_solid</name>
+            <type>Constant</type>
+            <value>0.0</value>
+        </parameter>
+        <parameter>
+            <name>rho_fluid</name>
+            <type>Constant</type>
+            <value>1000</value>
+        </parameter>
+        <parameter>
+            <name>lambda_solid</name>
+            <type>Constant</type>
+            <value>3.0</value>
+        </parameter>
+        <parameter>
+            <name>lambda_fluid</name>
+            <type>Constant</type>
+            <value>0.65</value>
+        </parameter>
+        <parameter>
+            <name>alpha_l</name>
+            <type>Constant</type>
+            <value>0.0</value>
+        </parameter>
+        <parameter>
+            <name>alpha_t</name>
+            <type>Constant</type>
+            <value>0.0</value>
+        </parameter>
+        <parameter>
+            <name>c_solid</name>
+            <type>Constant</type>
+            <value>0</value>
+        </parameter>
+        <parameter>
+            <name>T0</name>
+            <type>MeshNode</type>
+            <field_name>initial_temperature</field_name>
+        </parameter>
+        <parameter>
+            <name>P0</name>
+            <type>MeshNode</type>
+            <field_name>initial_pressure</field_name>
+        </parameter>
+        <parameter>
+            <name>p_Dirichlet_topleft</name>
+            <type>Constant</type>
+            <value>0</value>
+        </parameter>
+        <parameter>
+            <name>t_Dirichlet_bottom</name>
+            <type>Constant</type>
+            <value>170</value>
+        </parameter>
+        <parameter>
+            <name>t_Dirichlet_top</name>
+            <type>Constant</type>
+            <value>20</value>
+        </parameter>
+        <parameter>
+            <name>constant_porosity_parameter</name>
+            <type>Constant</type>
+            <value>0.001</value>
+        </parameter>
+        <parameter>
+            <name>kappa1</name>
+            <type>Constant</type>
+            <values>1.e-14 0 0 1.e-14</values>
+        </parameter>
+    </parameters>
+    <process_variables>
+        <process_variable>
+            <name>T</name>
+            <components>1</components>
+            <order>1</order>
+            <initial_condition>T0</initial_condition>
+            <boundary_conditions>
+                <boundary_condition>
+                    <geometrical_set>square_1x1_geometry</geometrical_set>
+                    <geometry>bottom</geometry>
+                    <type>Dirichlet</type>
+                    <parameter>t_Dirichlet_bottom</parameter>
+                </boundary_condition>
+                <boundary_condition>
+                    <geometrical_set>square_1x1_geometry</geometrical_set>
+                    <geometry>top</geometry>
+                    <type>Dirichlet</type>
+                    <parameter>t_Dirichlet_top</parameter>
+                </boundary_condition>
+            </boundary_conditions>
+        </process_variable>
+        <process_variable>
+            <name>p</name>
+            <components>1</components>
+            <order>1</order>
+            <initial_condition>P0</initial_condition>
+            <boundary_conditions>
+                <boundary_condition>
+                    <geometrical_set>square_1x1_geometry</geometrical_set>
+                    <geometry>topleft</geometry>
+                    <type>Dirichlet</type>
+                    <parameter>p_Dirichlet_topleft</parameter>
+                </boundary_condition>
+            </boundary_conditions>
+        </process_variable>
+    </process_variables>
+    <nonlinear_solvers>
+        <nonlinear_solver>
+            <name>picard_T</name>
+            <type>Picard</type>
+            <max_iter>20</max_iter>
+            <linear_solver>linear_solver_T</linear_solver>
+        </nonlinear_solver>
+        <nonlinear_solver>
+            <name>picard_H</name>
+            <type>Picard</type>
+            <max_iter>20</max_iter>
+            <linear_solver>linear_solver_H</linear_solver>
+        </nonlinear_solver>
+    </nonlinear_solvers>
+
+    <linear_solvers>
+        <linear_solver>
+            <name>linear_solver_T</name>
+            <lis>-i bicgstab -p ilu -tol 1e-20 -maxiter 10000</lis>
+            <eigen>
+                <solver_type>BiCGSTAB</solver_type>
+                <precon_type>ILUT</precon_type>
+                <max_iteration_step>10000</max_iteration_step>
+                <error_tolerance>1e-20</error_tolerance>
+            </eigen>
+            <petsc>
+                <prefix>T</prefix>
+                <parameters>-T_ksp_type bcgs -T_pc_type bjacobi -T_ksp_rtol 1e-16 -T_ksp_max_it 10000</parameters>
+            </petsc>
+        </linear_solver>
+        <linear_solver>
+            <name>linear_solver_H</name>
+            <lis>-i bicgstab -p jacobi -tol 1e-14 -maxiter 10000</lis>
+            <eigen>
+                <solver_type>BiCGSTAB</solver_type>
+                <precon_type>DIAGONAL</precon_type>
+                <max_iteration_step>10000</max_iteration_step>
+                <error_tolerance>1e-14</error_tolerance>
+            </eigen>
+            <petsc>
+                <prefix>H</prefix>
+                <parameters>-H_ksp_type bcgs -H_pc_type mg -H_ksp_rtol 1e-12 -H_ksp_max_it 10000</parameters>
+            </petsc>
+        </linear_solver>
+    </linear_solvers>
+</OpenGeoSysProject>