diff --git a/AssemblerLib/GlobalSetup.h b/AssemblerLib/GlobalSetup.h
index b795d4c55e43e32a1e6f92921e395435b60a12e8..cbe19618760abf1496d78af3660a40b30d0c13c2 100644
--- a/AssemblerLib/GlobalSetup.h
+++ b/AssemblerLib/GlobalSetup.h
@@ -41,16 +41,16 @@ struct GlobalSetup
 
     template <typename... Args>
     static
-    void execute(Args&& ... args)
+    void executeDereferenced(Args&& ... args)
     {
-        return Executor::execute(std::forward<Args>(args)...);
+        return Executor::executeDereferenced(std::forward<Args>(args)...);
     }
 
     template <typename... Args>
     static
-    void executeDereferenced(Args&& ... args)
+    void executeMemberDereferenced(Args&& ... args)
     {
-        return Executor::executeDereferenced(std::forward<Args>(args)...);
+        return Executor::executeMemberDereferenced(std::forward<Args>(args)...);
     }
 
     template <typename... Args>
diff --git a/AssemblerLib/SerialExecutor.h b/AssemblerLib/SerialExecutor.h
index d3232aceac3730fe631f82e565ccd2618ab3910d..59cf55703fa5935ae0e50547df3639e5b47d638d 100644
--- a/AssemblerLib/SerialExecutor.h
+++ b/AssemblerLib/SerialExecutor.h
@@ -18,48 +18,57 @@ namespace AssemblerLib
 
 struct SerialExecutor
 {
-    /// Executes a \c f for each element from the input container.
+    /// Executes \c f for each element from the input container.
+    ///
+    /// The elements of \c c are dereferenced before being passed to \c f.
     /// Return values of the function call are ignored.
     ///
-    /// \tparam F   \c f type.
-    /// \tparam C   input container type.
+    /// \note
+    /// This function is intended to be used if \c f is a plain function,
+    /// a static class method, a lambda or an object having a call operator
+    /// defined. For non-static member functions you are encouraged to use
+    /// executeMemberDereferenced(), if applicable.
+    ///
+    /// \tparam F    type of the callback function \c f.
+    /// \tparam C    input container type.
+    /// \tparam Args types additional arguments passed to \c f.
     ///
-    /// \param f    a function that accepts a pointer to container's elements and
-    ///             an index as arguments.
+    /// \param f    a function that accepts a a reference to container's elements,
+    ///             an index as arguments (and possibly further arguments).
     /// \param c    a container supporting access over operator[].
-    /// \param args additional arguments passed to \c f
+    ///             The elements of \c c must have pointer semantics, i.e.,
+    ///             support dereferencing via unary operator*().
+    /// \param args additional arguments passed to \c f.
+    ///
     template <typename F, typename C, typename ...Args_>
     static
     void
 #if defined(_MSC_VER) && (_MSC_VER >= 1700)
-    execute(F& f, C const& c, Args_&&... args)
+    executeDereferenced(F& f, C const& c, Args_&&... args)
 #else
-    execute(F const& f, C const& c, Args_&&... args)
+    executeDereferenced(F const& f, C const& c, Args_&&... args)
 #endif
     {
         for (std::size_t i = 0; i < c.size(); i++)
-            f(i, c[i], std::forward<Args_>(args)...);
+            f(i, *c[i], std::forward<Args_>(args)...);
     }
 
-    /// Executes a \c f for each element from the input container.
+    /// Executes the given \c method of the given \c object for each element
+    /// from the input \c container.
     ///
-    /// Return values of the function call are ignored.
-    /// This method is very similar to execute() the only difference
-    /// between the two is that here the elements of \c c are dereferenced
-    /// before being passed to the function \c f.
+    /// This method is very similar to executeDereferenced().
     ///
-    /// \see execute()
-    template <typename F, typename C, typename ...Args_>
-    static
-    void
-#if defined(_MSC_VER) && (_MSC_VER >= 1700)
-    executeDereferenced(F& f, C const& c, Args_&&... args)
-#else
-    executeDereferenced(F const& f, C const& c, Args_&&... args)
-#endif
+    /// \see executeDereferenced()
+    template <typename Container, typename Object, typename... MethodArgs, typename... Args>
+    static void
+    executeMemberDereferenced(
+            Object& object,
+            void (Object::*method)(MethodArgs... method_args) const,
+            Container const& container, Args&&... args)
     {
-        for (std::size_t i = 0; i < c.size(); i++)
-            f(i, *c[i], std::forward<Args_>(args)...);
+        for (std::size_t i = 0; i < container.size(); i++) {
+            (object.*method)(i, *container[i], std::forward<Args>(args)...);
+        }
     }
 
     /// Same as execute(f, c), but with two containers, where the second one is
diff --git a/AssemblerLib/VectorMatrixAssembler.h b/AssemblerLib/VectorMatrixAssembler.h
index 1ca086cfc675b034f4bbe3caea2c5797384a310d..8630f76fd18b69bcfb2b9d8fe70bed9752095306 100644
--- a/AssemblerLib/VectorMatrixAssembler.h
+++ b/AssemblerLib/VectorMatrixAssembler.h
@@ -70,13 +70,14 @@ namespace AssemblerLib
  * VectorMatrixAssembler type.
  */
 template<typename GlobalMatrix, typename GlobalVector,
+         typename LocalAssembler,
          NumLib::ODESystemTag ODETag>
 class VectorMatrixAssembler;
 
 
 //! Specialization for first-order implicit quasi-linear systems.
-template<typename GlobalMatrix, typename GlobalVector>
-class VectorMatrixAssembler<GlobalMatrix, GlobalVector,
+template<typename GlobalMatrix, typename GlobalVector, typename LocalAssembler>
+class VectorMatrixAssembler<GlobalMatrix, GlobalVector, LocalAssembler,
         NumLib::ODESystemTag::FirstOrderImplicitQuasilinear> final
 {
 public:
@@ -90,8 +91,7 @@ public:
     /// The positions in the global matrix/vector are taken from
     /// the LocalToGlobalIndexMap provided in the constructor at index \c id.
     /// \attention The index \c id is not necessarily the mesh item's id.
-    template <typename LocalAssembler>
-    void operator()(std::size_t const id,
+    void assemble(std::size_t const id,
         LocalAssembler& local_assembler,
         const double t, GlobalVector const& x,
         GlobalMatrix& M, GlobalMatrix& K, GlobalVector& b) const
@@ -112,7 +112,6 @@ public:
     /// Executes assembleJacobian() of the local assembler for the
     /// given mesh item passing the mesh item's nodal d.o.f.
     /// \attention The index \c id is not necessarily the mesh item's id.
-    template <typename LocalAssembler>
     void assembleJacobian(std::size_t const id,
                           LocalAssembler& local_assembler,
                           const double t,
@@ -135,7 +134,6 @@ public:
     /// Executes the preTimestep() method of the local assembler for the
     /// given mesh item passing the mesh item's nodal d.o.f.
     /// \attention The index \c id is not necessarily the mesh item's id.
-    template <typename LocalAssembler>
     void preTimestep(std::size_t const id,
                      LocalAssembler& local_assembler,
                      GlobalVector const& x) const
@@ -153,7 +151,6 @@ public:
     /// Executes the postTimestep() method of the local assembler for the
     /// given mesh item passing the mesh item's nodal d.o.f.
     /// \attention The index \c id is not necessarily the mesh item's id.
-    template <typename LocalAssembler>
     void postTimestep(std::size_t const id,
                       LocalAssembler& local_assembler,
                       GlobalVector const& x) const
@@ -184,8 +181,8 @@ private:
 
 
 //! Specialization used to add Neumann boundary conditions.
-template<typename GlobalMatrix, typename GlobalVector>
-class VectorMatrixAssembler<GlobalMatrix, GlobalVector,
+template<typename GlobalMatrix, typename GlobalVector, typename LocalAssembler>
+class VectorMatrixAssembler<GlobalMatrix, GlobalVector, LocalAssembler,
         NumLib::ODESystemTag::NeumannBC> final
 {
 public:
@@ -199,8 +196,7 @@ public:
     /// The positions in the global matrix/vector are taken from
     /// the LocalToGlobalIndexMap provided in the constructor at index \c id.
     /// \attention The index \c id is not necessarily the mesh item's id.
-    template <typename LocalAssembler>
-    void operator()(std::size_t const id,
+    void assemble(std::size_t const id,
         LocalAssembler& local_assembler,
         const double t, GlobalVector& b) const
     {
diff --git a/ProcessLib/GroundwaterFlowProcess.h b/ProcessLib/GroundwaterFlowProcess.h
index 13de202f6189d36daa92b1083ddb733dda5fb700..91a7fc14433f15a584a9770b4aba4680edec8667 100644
--- a/ProcessLib/GroundwaterFlowProcess.h
+++ b/ProcessLib/GroundwaterFlowProcess.h
@@ -69,11 +69,13 @@ public:
     }
 
     template <unsigned GlobalDim>
-    void createLocalAssemblers()
+    void createLocalAssemblers(AssemblerLib::LocalToGlobalIndexMap const& dof_table,
+                               MeshLib::Mesh const& mesh,
+                               unsigned const integration_order)
     {
         DBUG("Create local assemblers.");
         // Populate the vector of local assemblers.
-        _local_assemblers.resize(this->_mesh.getNElements());
+        _local_assemblers.resize(mesh.getNElements());
         // Shape matrices initializer
         using LocalDataInitializer = AssemblerLib::LocalDataInitializer<
             GroundwaterFlow::LocalAssemblerDataInterface,
@@ -90,26 +92,31 @@ public:
                 MeshLib::Element,
                 LocalDataInitializer>;
 
-        LocalAssemblerBuilder local_asm_builder(
-            initializer, *this->_local_to_global_index_map);
+        LocalAssemblerBuilder local_asm_builder(initializer, dof_table);
 
         DBUG("Calling local assembler builder for all mesh elements.");
         this->_global_setup.transform(
                 local_asm_builder,
-                this->_mesh.getElements(),
+                mesh.getElements(),
                 _local_assemblers,
-                this->_integration_order,
+                integration_order,
                 _hydraulic_conductivity);
     }
 
-    void createLocalAssemblers() override
+    void createAssemblers(AssemblerLib::LocalToGlobalIndexMap const& dof_table,
+                          MeshLib::Mesh const& mesh,
+                          unsigned const integration_order) override
     {
-        if (this->_mesh.getDimension()==1)
-            createLocalAssemblers<1>();
-        else if (this->_mesh.getDimension()==2)
-            createLocalAssemblers<2>();
-        else if (this->_mesh.getDimension()==3)
-            createLocalAssemblers<3>();
+        DBUG("Create global assembler.");
+        _global_assembler.reset(new GlobalAssembler(dof_table));
+
+        auto const dim = mesh.getDimension();
+        if (dim==1)
+            createLocalAssemblers<1>(dof_table, mesh, integration_order);
+        else if (dim==2)
+            createLocalAssemblers<2>(dof_table, mesh, integration_order);
+        else if (dim==3)
+            createLocalAssemblers<3>(dof_table, mesh, integration_order);
         else
             assert(false);
     }
@@ -130,6 +137,10 @@ private:
     using LocalAssembler = GroundwaterFlow::LocalAssemblerDataInterface<
         typename GlobalSetup::MatrixType, typename GlobalSetup::VectorType>;
 
+    using GlobalAssembler = AssemblerLib::VectorMatrixAssembler<
+            GlobalMatrix, GlobalVector, LocalAssembler,
+            NumLib::ODESystemTag::FirstOrderImplicitQuasilinear>;
+
 
     void assembleConcreteProcess(const double t, GlobalVector const& x,
                                  GlobalMatrix& M, GlobalMatrix& K, GlobalVector& b) override
@@ -139,10 +150,12 @@ private:
         DBUG("Assemble GroundwaterFlowProcess.");
 
         // Call global assembler for each local assembly item.
-        this->_global_setup.executeDereferenced(
-            *this->_global_assembler, _local_assemblers, t, x, M, K, b);
+        this->_global_setup.executeMemberDereferenced(
+            *_global_assembler, &GlobalAssembler::assemble,
+            _local_assemblers, t, x, M, K, b);
     }
 
+    std::unique_ptr<GlobalAssembler> _global_assembler;
     std::vector<std::unique_ptr<LocalAssembler>> _local_assemblers;
 };
 
diff --git a/ProcessLib/NeumannBc.h b/ProcessLib/NeumannBc.h
index db6b9f83531ed7bbb8a2788d4a159438a4109bb8..ba42a3bdf9894111bba88dcde106d7f9cb22572e 100644
--- a/ProcessLib/NeumannBc.h
+++ b/ProcessLib/NeumannBc.h
@@ -101,7 +101,9 @@ public:
     void integrate(GlobalSetup const& global_setup,
                    const double t, GlobalVector& b)
     {
-        global_setup.executeDereferenced(*_global_assembler, _local_assemblers, t, b);
+        global_setup.executeMemberDereferenced(
+                    *_global_assembler, &GlobalAssembler::assemble,
+                    _local_assemblers, t, b);
     }
 
     void initialize(GlobalSetup const& global_setup,
@@ -180,16 +182,16 @@ private:
     /// the #_function.
     unsigned const _integration_order;
 
+    using LocalAssembler = LocalNeumannBcAsmDataInterface<
+        GlobalMatrix, GlobalVector>;
+
     using GlobalAssembler =
         AssemblerLib::VectorMatrixAssembler<
-            GlobalMatrix, GlobalVector,
+            GlobalMatrix, GlobalVector, LocalAssembler,
             NumLib::ODESystemTag::NeumannBC>;
 
     std::unique_ptr<GlobalAssembler> _global_assembler;
 
-    using LocalAssembler = LocalNeumannBcAsmDataInterface<
-        GlobalMatrix, GlobalVector>;
-
     /// Local assemblers for each element of #_elements.
     std::vector<std::unique_ptr<LocalAssembler>> _local_assemblers;
 
diff --git a/ProcessLib/Process.h b/ProcessLib/Process.h
index b845ce1a4da1d8b03925e5a1eb425d287dc877a0..9ea4a3cf6d9cf344877837fd2372edd363d44fb9 100644
--- a/ProcessLib/Process.h
+++ b/ProcessLib/Process.h
@@ -60,8 +60,9 @@ public:
 	Process(MeshLib::Mesh& mesh,
 	        NonlinearSolver& nonlinear_solver,
 	        std::unique_ptr<TimeDiscretization>&& time_discretization)
-	    : _mesh(mesh), _nonlinear_solver(nonlinear_solver)
+	    : _nonlinear_solver(nonlinear_solver)
 	    , _time_discretization(std::move(time_discretization))
+	    , _mesh(mesh)
 	{}
 
 	virtual ~Process()
@@ -69,9 +70,6 @@ public:
 		delete _mesh_subset_all_nodes;
 	}
 
-	/// Process specific initialization called by initialize().
-	virtual void createLocalAssemblers() = 0;
-
 	/// Preprocessing before starting assembly for new timestep.
 	virtual void preTimestep(GlobalVector const& /*x*/,
 	                         const double /*t*/, const double /*delta_t*/) {}
@@ -141,11 +139,7 @@ public:
 		computeSparsityPattern();
 #endif
 
-		DBUG("Create global assembler.");
-		_global_assembler.reset(
-		    new GlobalAssembler(*_local_to_global_index_map));
-
-		createLocalAssemblers();
+		createAssemblers(*_local_to_global_index_map, _mesh, _integration_order);
 
 		DBUG("Initialize boundary conditions.");
 		for (ProcessVariable& pv : _process_variables)
@@ -230,6 +224,12 @@ public:
 	}
 
 private:
+	/// Process specific initialization called by initialize().
+	virtual void createAssemblers(
+	    AssemblerLib::LocalToGlobalIndexMap const& dof_table,
+	    MeshLib::Mesh const& mesh,
+	    unsigned const integration_order) = 0;
+
 	virtual void assembleConcreteProcess(
 	    const double t, GlobalVector const& x,
 	    GlobalMatrix& M, GlobalMatrix& K, GlobalVector& b) = 0;
@@ -345,23 +345,10 @@ private:
 	}
 
 protected:
-	unsigned const _integration_order = 2;
-
-	MeshLib::Mesh& _mesh;
 	MeshLib::MeshSubset const* _mesh_subset_all_nodes = nullptr;
 
 	GlobalSetup _global_setup;
 
-	using GlobalAssembler = AssemblerLib::VectorMatrixAssembler<
-	        GlobalMatrix,
-	        GlobalVector,
-	        NumLib::ODESystemTag::FirstOrderImplicitQuasilinear>;
-
-	std::unique_ptr<GlobalAssembler> _global_assembler;
-
-	std::unique_ptr<AssemblerLib::LocalToGlobalIndexMap>
-	    _local_to_global_index_map;
-
 	AssemblerLib::SparsityPattern _sparsity_pattern;
 
 	std::vector<DirichletBc<GlobalIndexType>> _dirichlet_bcs;
@@ -372,6 +359,12 @@ protected:
 
 	NonlinearSolver& _nonlinear_solver;
 	std::unique_ptr<TimeDiscretization> _time_discretization;
+
+private:
+	MeshLib::Mesh& _mesh;
+	std::unique_ptr<AssemblerLib::LocalToGlobalIndexMap>
+	    _local_to_global_index_map;
+	unsigned const _integration_order = 2;
 };
 
 /// Find a process variable for a name given in the process configuration under
diff --git a/Tests/AssemblerLib/TestSerialExecutor.cpp b/Tests/AssemblerLib/TestSerialExecutor.cpp
index 056ac2e2870ed5ea6782dae9bf95efced8c661d3..0f47f417ce9cfa52144550ba6ff6d1fff5ac2e8c 100644
--- a/Tests/AssemblerLib/TestSerialExecutor.cpp
+++ b/Tests/AssemblerLib/TestSerialExecutor.cpp
@@ -15,59 +15,86 @@
 #include <numeric>
 #include "AssemblerLib/SerialExecutor.h"
 
-template <typename Container>
+template <typename ContainerElement_>
 class AssemblerLibSerialExecutor : public ::testing::Test
 {
-    public:
-    AssemblerLibSerialExecutor()
+public:
+    using ContainerElement = ContainerElement_;
+    using Container = std::vector<ContainerElement>;
+    using PtrContainer = std::vector<ContainerElement*>;
+
+    template<typename Callback>
+    void test(Callback const& cb)
     {
-        reference.resize(size);
+        Container reference(size);
         std::iota(reference.begin(), reference.end(), 0);
 
-        std::copy(reference.begin(), reference.end(),
-            std::back_inserter(container));
+        Container container_back(reference);
+
+        PtrContainer container;
+        container.reserve(size);
+        for (auto& el : container_back) container.push_back(&el);
+
+        cb(container, reference);
+
+        ASSERT_TRUE(referenceIsZero(reference));
     }
 
-    void
-    subtractFromReference(int const value, std::size_t const index)
+    static void subtractFromReferenceStatic(
+            ContainerElement const value, std::size_t const index,
+            Container& reference)
     {
         reference[index] -= value;
     }
 
-    bool
-    referenceIsZero() const
+    void subtractFromReference(
+            ContainerElement const value, std::size_t const index,
+            Container& reference) const
+    {
+        reference[index] -= value;
+    }
+
+    static bool
+    referenceIsZero(Container const& reference)
     {
         return std::all_of(reference.begin(), reference.end(),
-            [](int const reference_value)
+            [](ContainerElement const reference_value)
             {
                 return reference_value == 0;
             });
     }
 
     static std::size_t const size = 100;
-    std::vector<int> reference;
-    Container container;
 };
 
-TYPED_TEST_CASE_P(AssemblerLibSerialExecutor);
+typedef ::testing::Types<int> TestCases;
 
-using namespace std::placeholders;
+TYPED_TEST_CASE(AssemblerLibSerialExecutor, TestCases);
 
-TYPED_TEST_P(AssemblerLibSerialExecutor, ContainerArgument)
+TYPED_TEST(AssemblerLibSerialExecutor, ContainerArgument)
 {
-    AssemblerLib::SerialExecutor::execute(
-        std::bind(&TestFixture::subtractFromReference, this, _1, _2),
-        this->container);
-    ASSERT_TRUE(this->referenceIsZero());
-}
+    using Elem         = typename TestFixture::ContainerElement;
+    using Container    = typename TestFixture::Container;
+    using PtrContainer = typename TestFixture::PtrContainer;
 
-REGISTER_TYPED_TEST_CASE_P(AssemblerLibSerialExecutor,
-    ContainerArgument);
+    TestFixture::test(
+        [](PtrContainer const& ctnr, Container& ref) {
+            auto cb_static =
+                [](Elem const value, std::size_t const index, Container& ref_inner) {
+                    TestFixture::subtractFromReferenceStatic(value, index, ref_inner);
+                };
 
+            AssemblerLib::SerialExecutor::executeDereferenced(
+                cb_static, ctnr, ref);
+        }
+    );
 
-typedef ::testing::Types <
-      std::vector<int>
-    > TestTypes;
-
-INSTANTIATE_TYPED_TEST_CASE_P(templated, AssemblerLibSerialExecutor,
-    TestTypes);
+    TestFixture::test(
+        [this](PtrContainer const& ctnr, Container& ref) {
+            AssemblerLib::SerialExecutor::executeMemberDereferenced(
+                *static_cast<TestFixture*>(this), &TestFixture::subtractFromReference,
+                ctnr, ref
+            );
+        }
+    );
+}
diff --git a/Tests/AssemblerLib/TestSerialLinearSolver.cpp b/Tests/AssemblerLib/TestSerialLinearSolver.cpp
index 9e63e9507690239359e567c28300c65f6d4ff095..5ab7cbee819ece25b3ce3b8c8b70ac6d204c0824 100644
--- a/Tests/AssemblerLib/TestSerialLinearSolver.cpp
+++ b/Tests/AssemblerLib/TestSerialLinearSolver.cpp
@@ -78,9 +78,9 @@ TEST(AssemblerLibSerialLinearSolver, Steady2DdiffusionQuadElem)
                globalSetup.createVector(local_to_global_index_map.dofSize())};
     // TODO no setZero() for rhs, x?
 
+    using LocalAssembler = Example::LocalAssemblerData<GlobalMatrix, GlobalVector>;
     // Initializer of the local assembler data.
-    std::vector<Example::LocalAssemblerData<
-        GlobalMatrix, GlobalVector>*> local_assembler_data;
+    std::vector<LocalAssembler*> local_assembler_data;
     local_assembler_data.resize(ex1.msh->getNElements());
 
     typedef AssemblerLib::LocalAssemblerBuilder<
@@ -106,7 +106,7 @@ TEST(AssemblerLibSerialLinearSolver, Steady2DdiffusionQuadElem)
     // TODO in the future use simpler NumLib::ODESystemTag
     // Local and global assemblers.
     typedef AssemblerLib::VectorMatrixAssembler<
-            GlobalMatrix, GlobalVector,
+            GlobalMatrix, GlobalVector, LocalAssembler,
             NumLib::ODESystemTag::FirstOrderImplicitQuasilinear> GlobalAssembler;
 
     GlobalAssembler assembler(local_to_global_index_map);
@@ -116,8 +116,9 @@ TEST(AssemblerLibSerialLinearSolver, Steady2DdiffusionQuadElem)
         globalSetup.createMatrix(local_to_global_index_map.dofSize())};
     A->setZero();
     auto const t = 0.0;
-    globalSetup.executeDereferenced(
-        assembler, local_assembler_data, t, *x, *M_dummy, *A, *rhs);
+    globalSetup.executeMemberDereferenced(
+                assembler, &GlobalAssembler::assemble,
+                local_assembler_data, t, *x, *M_dummy, *A, *rhs);
 
     //std::cout << "A=\n";
     //A->write(std::cout);