diff --git a/MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h b/MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h
index a5297fc24187451f3e039e2d88028a92a638e883..e60f4f43761e2e6c9794c18483dee3678fc944b6 100644
--- a/MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h
+++ b/MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h
@@ -18,16 +18,16 @@ namespace MathLib {
 
 template<typename FP_TYPE, typename IDX_TYPE> class CRSMatrixOpenMP : public CRSMatrix<FP_TYPE, IDX_TYPE> {
 public:
-	CRSMatrixOpenMP(std::string const &fname, unsigned num_of_threads) :
-			CRSMatrix<FP_TYPE, IDX_TYPE>(fname), _num_of_threads (num_of_threads)
+	CRSMatrixOpenMP(std::string const &fname) :
+			CRSMatrix<FP_TYPE, IDX_TYPE>(fname)
 	{}
 
-	CRSMatrixOpenMP(unsigned n, IDX_TYPE *iA, IDX_TYPE *jA, FP_TYPE* A, unsigned num_of_threads) :
-		CRSMatrix<FP_TYPE, IDX_TYPE>(n, iA, jA, A), _num_of_threads (num_of_threads)
+	CRSMatrixOpenMP(unsigned n, IDX_TYPE *iA, IDX_TYPE *jA, FP_TYPE* A) :
+		CRSMatrix<FP_TYPE, IDX_TYPE>(n, iA, jA, A)
 	{}
 
 	CRSMatrixOpenMP(unsigned n1) :
-		CRSMatrix<FP_TYPE, IDX_TYPE>(n1), _num_of_threads (1)
+		CRSMatrix<FP_TYPE, IDX_TYPE>(n1)
 	{}
 
 	virtual ~CRSMatrixOpenMP()
@@ -37,9 +37,6 @@ public:
 	{
 		amuxCRSParallelOpenMP(d, MatrixBase::_n_rows, CRSMatrix<FP_TYPE,IDX_TYPE>::_row_ptr, CRSMatrix<FP_TYPE,IDX_TYPE>::_col_idx, CRSMatrix<FP_TYPE,IDX_TYPE>::_data, x, y);
 	}
-
-private:
-	unsigned _num_of_threads;
 };
 
 } // end namespace MathLib
diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.cpp b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f773702c42ef58eac66f16ba5591560f11555339
--- /dev/null
+++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.cpp
@@ -0,0 +1,29 @@
+/*
+ * CRSMatrixReorderedOpenMP.cpp
+ *
+ *  Created on: Jan 20, 2012
+ *      Author: TF
+ */
+
+#ifdef _OPENMP
+#include "LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.h"
+#include "LinAlg/Sparse/amuxCRS.h"
+
+namespace MathLib {
+
+CRSMatrixReorderedOpenMP::CRSMatrixReorderedOpenMP(unsigned n, unsigned *iA, unsigned *jA, double* A) :
+	CRSMatrixReordered(n, iA, jA, A)
+{
+}
+
+CRSMatrixReorderedOpenMP::~CRSMatrixReorderedOpenMP()
+{}
+
+void CRSMatrixReorderedOpenMP::amux(double d, double const * const x, double *y) const
+{
+	amuxCRSParallelOpenMP(d, MatrixBase::_n_rows, CRSMatrix<double,unsigned>::_row_ptr, CRSMatrix<double,unsigned>::_col_idx, CRSMatrix<double,unsigned>::_data, x, y);
+}
+
+} // end namespace MathLib
+
+#endif // _OPENMP
diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.h b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.h
new file mode 100644
index 0000000000000000000000000000000000000000..471a91980e8b4ab5d5952589f0c16fd4d28a49b7
--- /dev/null
+++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.h
@@ -0,0 +1,27 @@
+/*
+ * CRSMatrixReorderedOpenMP.h
+ *
+ *  Created on: Jan 20, 2012
+ *      Author: TF
+ */
+
+#ifndef CRSMATRIXREORDEREDOPENMP_H_
+#define CRSMATRIXREORDEREDOPENMP_H_
+
+#ifdef _OPENMP
+#include "LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReordered.h"
+
+namespace MathLib {
+
+class CRSMatrixReorderedOpenMP : public CRSMatrixReordered {
+public:
+	CRSMatrixReorderedOpenMP(unsigned n, unsigned *iA, unsigned *jA, double* A);
+	virtual ~CRSMatrixReorderedOpenMP();
+
+	virtual void amux(double d, double const * const x, double *y) const;
+};
+
+}
+#endif // _OPENMP
+
+#endif /* CRSMATRIXREORDEREDOPENMP_H_ */
diff --git a/SimpleTests/MatrixTests/CMakeLists.txt b/SimpleTests/MatrixTests/CMakeLists.txt
index 693185ee6c2bce8412f7d984ffa186eff07f274b..0bbb06cdbf24b7005e1e69cc16356d9c55b2ec17 100644
--- a/SimpleTests/MatrixTests/CMakeLists.txt
+++ b/SimpleTests/MatrixTests/CMakeLists.txt
@@ -38,6 +38,18 @@ IF (METIS_FOUND)
 		MathLib
 		${METIS_LIBRARIES}
 	)
+
+	ADD_EXECUTABLE( MatVecMultNDPermOpenMP
+        	MatVecMultNDPermOpenMP.cpp
+	        ${SOURCES}
+        	${HEADERS}
+	)
+
+	TARGET_LINK_LIBRARIES ( MatVecMultNDPermOpenMP
+		Base
+		MathLib
+		${METIS_LIBRARIES}
+	)
 ENDIF(METIS_FOUND)
 
 
diff --git a/SimpleTests/MatrixTests/MatMult.cpp b/SimpleTests/MatrixTests/MatMult.cpp
index 4b7f5d4cff519a7864a5010e6e5cc33b6afc90ce..f1de76aa8af1abc2fcec7b06f6aa322699cd1671 100644
--- a/SimpleTests/MatrixTests/MatMult.cpp
+++ b/SimpleTests/MatrixTests/MatMult.cpp
@@ -58,7 +58,7 @@ int main(int argc, char *argv[])
 
 #ifdef _OPENMP
 	omp_set_num_threads(n_threads);
-	MathLib::CRSMatrixOpenMP<double, unsigned> mat (n, iA, jA, A, n_threads);
+	MathLib::CRSMatrixOpenMP<double, unsigned> mat (n, iA, jA, A);
 #else
 	MathLib::CRSMatrix<double, unsigned> mat (n, iA, jA, A);
 #endif
diff --git a/SimpleTests/MatrixTests/MatVecMultNDPermOpenMP.cpp b/SimpleTests/MatrixTests/MatVecMultNDPermOpenMP.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1947d62bd2b2db1fcc35b801774720770934e44d
--- /dev/null
+++ b/SimpleTests/MatrixTests/MatVecMultNDPermOpenMP.cpp
@@ -0,0 +1,152 @@
+/*
+ * MatVecMultNDPermOpenMP.cpp
+ *
+ *  Created on: Jan 20, 2012
+ *      Author: TF
+ */
+
+#include <cstdlib>
+
+// BaseLib
+#include "RunTimeTimer.h"
+#include "CPUTimeTimer.h"
+
+// MathLib
+#include "sparse.h"
+
+#include "LinAlg/Sparse/NestedDissectionPermutation/AdjMat.h"
+#include "LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.h"
+#include "LinAlg/Sparse/NestedDissectionPermutation/Cluster.h"
+
+int main(int argc, char *argv[])
+{
+	if (argc < 4) {
+		std::cout << "Usage: " << argv[0] << " matrix number_of_multiplications resultfile" << std::endl;
+		return 1;
+	}
+
+	// read the number of multiplication to execute
+	unsigned n_mults (0);
+	n_mults = atoi (argv[2]);
+
+	std::string fname_mat (argv[1]);
+
+	bool verbose (true);
+
+	// *** reading matrix in crs format from file
+	std::ifstream in(fname_mat.c_str(), std::ios::in | std::ios::binary);
+	double *A(NULL);
+	unsigned *iA(NULL), *jA(NULL), n;
+	if (in) {
+		if (verbose) {
+			std::cout << "reading matrix from " << fname_mat << " ... " << std::flush;
+		}
+		RunTimeTimer timer;
+		timer.start();
+		CS_read(in, n, iA, jA, A);
+		timer.stop();
+		if (verbose) {
+			std::cout << "ok, [wclock: " << timer.elapsed() << " s]" << std::endl;
+		}
+	} else {
+		std::cout << "error reading matrix from " << fname_mat << std::endl;
+	}
+	unsigned nnz(iA[n]);
+	if (verbose) {
+		std::cout << "Parameters read: n=" << n << ", nnz=" << nnz << std::endl;
+	}
+
+	MathLib::CRSMatrixReorderedOpenMP mat(n, iA, jA, A);
+
+	double *x(new double[n]);
+	double *y(new double[n]);
+
+	for (unsigned k(0); k<n; ++k)
+		x[k] = 1.0;
+
+	// create time measurement objects
+	RunTimeTimer run_timer;
+	CPUTimeTimer cpu_timer;
+
+	// calculate the nested dissection reordering
+	if (verbose) {
+		std::cout << "calculating nested dissection permutation of matrix ... " << std::flush;
+	}
+	run_timer.start();
+	cpu_timer.start();
+	MathLib::Cluster cluster_tree(n, iA, jA);
+	unsigned *op_perm(new unsigned[n]);
+	unsigned *po_perm(new unsigned[n]);
+	for (unsigned k(0); k<n; k++)
+		op_perm[k] = po_perm[k] = k;
+	cluster_tree.createClusterTree(op_perm, po_perm, 1000);
+	cpu_timer.stop();
+	run_timer.stop();
+	if (verbose) {
+		std::cout << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << std::endl;
+	} else {
+		if (argc == 4) {
+			std::ofstream result_os(argv[3], std::ios::app);
+			if (result_os) {
+				result_os << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << " calc nested dissection perm" << std::endl;
+			}
+			result_os.close();
+		} else {
+			std::cout << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << " calc nested dissection perm" << std::endl;
+		}
+	}
+
+
+	// applying the nested dissection reordering
+	if (verbose) {
+		std::cout << "applying nested dissection permutation to FEM matrix ... " << std::flush;
+	}
+	run_timer.start();
+	cpu_timer.start();
+	mat.reorderMatrix(op_perm, po_perm);
+	cpu_timer.stop();
+	run_timer.stop();
+	if (verbose) std::cout << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << std::endl;
+	else {
+		if (argc == 4) {
+			std::ofstream result_os(argv[3], std::ios::app);
+			if (result_os) {
+				result_os << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << " applying nested dissection perm" << std::endl;
+			}
+			result_os.close();
+		} else {
+			std::cout << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << std::endl;
+		}
+	}
+
+	if (verbose) {
+		std::cout << "matrix vector multiplication with Toms amuxCRS ... " << std::flush;
+	}
+	run_timer.start();
+	cpu_timer.start();
+	for (size_t k(0); k<n_mults; k++) {
+		mat.amux (1.0, x, y);
+	}
+	cpu_timer.stop();
+	run_timer.stop();
+
+	if (verbose) {
+		std::cout << "done [" << cpu_timer.elapsed() << " sec cpu time], [wclock: "
+				<< run_timer.elapsed() << " sec]" << std::endl;
+	} else {
+		if (argc == 4) {
+			std::ofstream result_os (argv[3], std::ios::app);
+			if (result_os) {
+				result_os << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << " " << n_mults << " MatVecMults, matrix " << fname_mat << std::endl;
+			}
+			result_os.close();
+		} else {
+			std::cout << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << std::endl;
+		}
+	}
+
+	delete [] x;
+	delete [] y;
+
+	return 0;
+}