diff --git a/Base/quicksort.h b/Base/quicksort.h
index 78f69f558560b2bbd54d4b31e202508b929e9982..72648f2ebb66dcca98d1c6e934be496c09990aa8 100644
--- a/Base/quicksort.h
+++ b/Base/quicksort.h
@@ -43,16 +43,23 @@ unsigned partition_(T* array, unsigned beg, unsigned end)
   return j;
 }
 
-
 /**
- * version of partition_ that additional updates the permutation vector
- * */
-template <class T>
-size_t partition_(T* array, size_t beg, size_t end, size_t *perm)
+ * Permutes the entries of a part of an array such that all entries that are smaller
+ * than a certain value are at the beginning of the array and all entries that are
+ * bigger are at the end of the array. This version of partition_ permutes a second
+ * array second_array according to the sorting.
+ * @param array array to sort
+ * @param beg beginning index in array for sorting
+ * @param end end-1 is the last index in array for sorting
+ * @param second_array the second array is permuted according to the sort process of array
+ * @return
+ */
+template <typename T1, typename T2>
+size_t partition_(T1* array, size_t beg, size_t end, T2 *second_array)
 {
 	size_t i = beg + 1;
 	size_t j = end - 1;
-	T m = array[beg];
+	T1 m = array[beg];
 
 	for (;;) {
 		while ((i < end) && (array[i] <= m))
@@ -63,24 +70,29 @@ size_t partition_(T* array, size_t beg, size_t end, size_t *perm)
 		if (i >= j)
 			break;
 		BASELIB::swap(array[i], array[j]);
-		BASELIB::swap(perm[i], perm[j]);
+		BASELIB::swap(second_array[i], second_array[j]);
 	}
 
 	BASELIB::swap(array[beg], array[j]);
-	BASELIB::swap(perm[beg], perm[j]);
+	BASELIB::swap(second_array[beg], second_array[j]);
 	return j;
 }
 
 /**
- * version of quickSort that stores the permutation
- * */
-template <class T>
-void quicksort(T* array, size_t beg, size_t end, size_t* perm)
+ * version of quickSort that permutes the entries of a second array
+ * according to the permutation of the first array
+ * @param array array to sort
+ * @param beg beginning index in array for sorting
+ * @param end end-1 is the last index in array for sorting
+ * @param second_array the second array is permuted according to the sort process of array
+ */
+template <typename T1, typename T2>
+void quicksort(T1* array, size_t beg, size_t end, T2* second_array)
 {
 	if (beg < end) {
-		size_t p = partition_(array, beg, end, perm);
-		quicksort(array, beg, p, perm);
-		quicksort(array, p+1, end, perm);
+		size_t p = partition_(array, beg, end, second_array);
+		quicksort(array, beg, p, second_array);
+		quicksort(array, p+1, end, second_array);
 	}
 }
 
diff --git a/MathLib/CMakeLists.txt b/MathLib/CMakeLists.txt
index cc426573501a7564ecb86a9465baaecbe0c9ac64..2de9b81fed758fc0b147a1544326498d6c52fe49 100644
--- a/MathLib/CMakeLists.txt
+++ b/MathLib/CMakeLists.txt
@@ -47,12 +47,20 @@ FILE(GLOB_RECURSE HEADERS . *.h)
 #	LinAlg/Preconditioner/generateDiagPrecond.cpp
 #)
 
+FIND_PACKAGE( Metis )
+IF (METIS_FOUND)
+	MESSAGE (STATUS "metis found.")
+ELSE(METIS_FOUND)
+	MESSAGE (STATUS "metis not found.")
+ENDIF(METIS_FOUND)
+
 FILE(GLOB_RECURSE SOURCES . *.cpp)
 
 INCLUDE_DIRECTORIES (
 	.
 	../Base
 	../GeoLib
+	${METIS_INCLUDE_DIR}
 )
 
 # Create the library
diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/AdjMat.h b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/AdjMat.h
index 91c8ad7c56bba097e01fe234449341d810bca084..2b1c21ccde03f88e12c4fbe3d290725ee065265d 100644
--- a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/AdjMat.h
+++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/AdjMat.h
@@ -39,7 +39,7 @@ public:
 	{}
 
 	/**
-	 * The
+	 *
 	 */
 	void makeSymmetric();
 
@@ -57,7 +57,7 @@ public:
 private:
 	/**
 	 * copy constructor
-	 * @param src another AdjMat with data for initializiation
+	 * @param src another AdjMat with data for initialization
 	 */
 	AdjMat(const AdjMat&);
 
diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReordered.cpp b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReordered.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4c2ff934e95d682dc181d0ff845d3af70a903d1f
--- /dev/null
+++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReordered.cpp
@@ -0,0 +1,71 @@
+/*
+ * CRSMatrixReordered.cpp
+ *
+ *  Created on: Jan 3, 2012
+ *      Author: TF
+ */
+
+// BaseLib
+#include "quicksort.h"
+
+#include "LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReordered.h"
+
+namespace MathLib {
+
+CRSMatrixReordered::CRSMatrixReordered(std::string const &fname) :
+	CRSMatrix<double,unsigned>(fname)
+{}
+
+CRSMatrixReordered::CRSMatrixReordered(unsigned n, unsigned *iA, unsigned *jA, double* A) :
+	CRSMatrix<double, unsigned> (n, iA, jA, A)
+{}
+
+CRSMatrixReordered::~CRSMatrixReordered()
+{}
+
+void CRSMatrixReordered::reorderMatrix(unsigned const*const op_perm, unsigned const*const po_perm)
+{
+	unsigned i; // row and col idx in permuted matrix
+	unsigned j, idx; // pointer in jA
+
+	const unsigned size(getNRows());
+
+	unsigned *pos(new unsigned[size + 1]);
+	for (i = 0; i < size; i++) {
+		const unsigned original_row(op_perm[i]);
+		pos[i] = _row_ptr[original_row+1] - _row_ptr[original_row];
+	}
+	pos[size] = 0;
+
+	unsigned *iAn(new unsigned[size + 1]);
+	iAn[0] = 0;
+	for (i = 0; i < size; i++)
+		iAn[i + 1] = iAn[i] + pos[i];
+	for (i = 0; i < size; i++)
+		pos[i] = iAn[i];
+
+	unsigned *jAn(new unsigned[iAn[size]]);
+	double *An(new double[iAn[size]]);
+	for (i = 0; i < size; i++) {
+		const unsigned original_row(op_perm[i]);
+		idx = _row_ptr[original_row+1];
+		for (j = _row_ptr[original_row]; j < idx; j++) {
+			jAn[pos[i]] = po_perm[_col_idx[j]];
+			An[pos[i]++] = _data[j];
+		}
+	}
+
+	delete[] pos;
+	for (i = 0; i < size; ++i)
+		quicksort(jAn, static_cast<size_t>(iAn[i]), static_cast<size_t>(iAn[i + 1]), An);
+
+	BASELIB::swap(iAn, _row_ptr);
+	BASELIB::swap(jAn, _col_idx);
+	BASELIB::swap(An, _data);
+
+	delete [] iAn;
+	delete [] jAn;
+	delete [] An;
+}
+
+} // end namespace MathLib
diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReordered.h b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReordered.h
new file mode 100644
index 0000000000000000000000000000000000000000..a7d372ff033568ab830509c3c5bd9e550b173916
--- /dev/null
+++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReordered.h
@@ -0,0 +1,26 @@
+/*
+ * CRSMatrixReordered.h
+ *
+ *  Created on: Jan 3, 2012
+ *      Author: TF
+ */
+
+#ifndef CRSMATRIXREORDERED_H_
+#define CRSMATRIXREORDERED_H_
+
+#include "LinAlg/Sparse/CRSMatrix.h"
+
+namespace MathLib {
+
+class CRSMatrixReordered: public MathLib::CRSMatrix<double,unsigned>
+{
+public:
+	CRSMatrixReordered(std::string const &fname);
+	CRSMatrixReordered(unsigned n, unsigned *iA, unsigned *jA, double* A);
+	virtual ~CRSMatrixReordered();
+	void reorderMatrix(unsigned const*const op_perm, unsigned const*const po_perm);
+};
+
+}
+
+#endif /* CRSMATRIXREORDERED_H_ */
diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Cluster.cpp b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Cluster.cpp
index dadbb3920e1a13c44967a81d09896f24920de1ad..05d1eb93fbb80f4776a77a679bf49849b839ac65 100644
--- a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Cluster.cpp
+++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Cluster.cpp
@@ -5,6 +5,8 @@
  *      Author: TF
  */
 
+#include "metis.h"
+
 // BaseLib
 #include "swap.h"
 
@@ -14,13 +16,14 @@
 #include "Separator.h"
 #include "AdjMat.h"
 
+// METIS function
+//extern "C" void METIS_ComputeVertexSeparator(unsigned*, unsigned*, unsigned*,
+//					   unsigned*, unsigned*, unsigned*, unsigned*);
 
-namespace MathLib {
+//extern "C" void METIS_NodeND(unsigned*, unsigned*, unsigned*,
+//				   unsigned*, unsigned*, unsigned*, unsigned*);
 
-// METIS function
-extern "C" void METIS_NodeComputeSeparator(unsigned*, unsigned*, unsigned*,
-					   unsigned*, unsigned*, unsigned*,
-					   unsigned*, unsigned*);
+namespace MathLib {
 
 Cluster::Cluster (unsigned n, unsigned* iA, unsigned* jA)
   : ClusterBase (n, iA, jA)
@@ -42,72 +45,81 @@ void Cluster::subdivide(unsigned bmin)
 		unsigned *xadj(const_cast<unsigned*>(_l_adj_mat->getRowPtrArray()));
 		unsigned *adjncy(const_cast<unsigned*>(_l_adj_mat->getColIdxArray()));
 //		unsigned nparts = 2;
-		unsigned options = 0; // for METIS
-		unsigned sepsize(0); // for METIS
+		unsigned options[METIS_NOPTIONS]; // for METIS
+		METIS_SetDefaultOptions(options);
+//		options[METIS OPTION PTYPE] = METIS PTYPE RB;
+//		options[METIS OPTION OBJTYPE] = METIS OBJTYPE CUT;
+//		options[METIS OPTION CTYPE] = METIS CTYPE SHEM;
+//		options[] = ;
+//		options[] = ;
+//		options[] = ;
+
+//		unsigned sepsize(0); // for METIS
 		unsigned *vwgt(new unsigned[n_rows + 1]);
-		const unsigned nnz(xadj[n_rows]);
-		unsigned *adjwgt(new unsigned[nnz]);
+//		const unsigned nnz(xadj[n_rows]);
+//		unsigned *adjwgt(new unsigned[nnz]);
 		for (unsigned k(0); k < n_rows + 1; k++)
 			vwgt[k] = 1;
-		for (unsigned k(0); k < nnz; k++)
-			adjwgt[k] = 1;
-		unsigned *part(new unsigned[n_rows + 1]);
+//		for (unsigned k(0); k < nnz; k++)
+//			adjwgt[k] = 1;
+//		unsigned *part(new unsigned[n_rows + 1]);
 
 		// subdivide the index set into three parts employing METIS
-		METIS_NodeComputeSeparator(&n_rows, xadj, adjncy, vwgt, adjwgt, &options,
-				&sepsize, part);
-
-		// create and init local permutations
-		unsigned *l_op_perm(new unsigned[size]);
-		unsigned *l_po_perm(new unsigned[size]);
-		for (unsigned i = 0; i < size; ++i)
-			l_op_perm[i] = l_po_perm[i] = i;
-
-		unsigned isep1, isep2;
-		updatePerm(part, isep1, isep2, l_op_perm, l_po_perm);
-		delete[] part;
-
-		// update global permutation
-		unsigned *t_op_perm = new unsigned[size];
-		for (unsigned k = 0; k < size; ++k)
-			t_op_perm[k] = _g_op_perm[_beg + l_op_perm[k]];
-
-		for (unsigned k = _beg; k < _end; ++k) {
-			_g_op_perm[k] = t_op_perm[k - _beg];
-			_g_po_perm[_g_op_perm[k]] = k;
-		}
-		delete[] t_op_perm;
-
-		// next recursion step
-		if ((isep1 >= bmin) && (isep2 - isep1 >= bmin)) {
-			// construct adj matrices for [0, isep1), [isep1,isep2), [isep2, _end)
-			AdjMat *l_adj0(_l_adj_mat->getMat(0, isep1, l_op_perm, l_po_perm));
-			AdjMat *l_adj1(_l_adj_mat->getMat(isep1, isep2, l_op_perm, l_po_perm));
-			AdjMat *l_adj2(_l_adj_mat->getMat(isep2, size, l_op_perm, l_po_perm));
-
-			delete[] l_op_perm;
-			delete[] l_po_perm;
-			delete _l_adj_mat;
-			_l_adj_mat = NULL;
-
-			_n_sons = 3;
-			_sons = new ClusterBase*[_n_sons];
-
-			isep1 += _beg;
-			isep2 += _beg;
-
-			// constructing child nodes for index cluster tree
-			_sons[0] = new Cluster(this, _beg, isep1, _g_op_perm, _g_po_perm, _g_adj_mat, l_adj0);
-			_sons[1] = new Cluster(this, isep1, isep2, _g_op_perm, _g_po_perm, _g_adj_mat, l_adj1);
-			_sons[2] = new Separator(this, isep2, _end, _g_op_perm,	_g_po_perm, _g_adj_mat, l_adj2);
-
-			dynamic_cast<Cluster*>(_sons[0])->subdivide(bmin);
-			dynamic_cast<Cluster*>(_sons[1])->subdivide(bmin);
-
-		} else {
-			delete _l_adj_mat;
-			_l_adj_mat = NULL;
-		} // end if next recursion step
+//		METIS_ComputeVertexSeparator(&n_rows, xadj, adjncy, vwgt, &options,
+//				&sepsize, part);
+		METIS_NodeND(&n_rows, xadj, adjncy, vwgt, options, _g_op_perm, _g_po_perm);
+
+//		// create and init local permutations
+//		unsigned *l_op_perm(new unsigned[size]);
+//		unsigned *l_po_perm(new unsigned[size]);
+//		for (unsigned i = 0; i < size; ++i)
+//			l_op_perm[i] = l_po_perm[i] = i;
+//
+//		unsigned isep1, isep2;
+//		updatePerm(part, isep1, isep2, l_op_perm, l_po_perm);
+//		delete[] part;
+//
+//		// update global permutation
+//		unsigned *t_op_perm = new unsigned[size];
+//		for (unsigned k = 0; k < size; ++k)
+//			t_op_perm[k] = _g_op_perm[_beg + l_op_perm[k]];
+//
+//		for (unsigned k = _beg; k < _end; ++k) {
+//			_g_op_perm[k] = t_op_perm[k - _beg];
+//			_g_po_perm[_g_op_perm[k]] = k;
+//		}
+//		delete[] t_op_perm;
+//
+//		// next recursion step
+//		if ((isep1 >= bmin) && (isep2 - isep1 >= bmin)) {
+//			// construct adj matrices for [0, isep1), [isep1,isep2), [isep2, _end)
+//			AdjMat *l_adj0(_l_adj_mat->getMat(0, isep1, l_op_perm, l_po_perm));
+//			AdjMat *l_adj1(_l_adj_mat->getMat(isep1, isep2, l_op_perm, l_po_perm));
+//			AdjMat *l_adj2(_l_adj_mat->getMat(isep2, size, l_op_perm, l_po_perm));
+//
+//			delete[] l_op_perm;
+//			delete[] l_po_perm;
+//			delete _l_adj_mat;
+//			_l_adj_mat = NULL;
+//
+//			_n_sons = 3;
+//			_sons = new ClusterBase*[_n_sons];
+//
+//			isep1 += _beg;
+//			isep2 += _beg;
+//
+//			// constructing child nodes for index cluster tree
+//			_sons[0] = new Cluster(this, _beg, isep1, _g_op_perm, _g_po_perm, _g_adj_mat, l_adj0);
+//			_sons[1] = new Cluster(this, isep1, isep2, _g_op_perm, _g_po_perm, _g_adj_mat, l_adj1);
+//			_sons[2] = new Separator(this, isep2, _end, _g_op_perm,	_g_po_perm, _g_adj_mat, l_adj2);
+//
+//			dynamic_cast<Cluster*>(_sons[0])->subdivide(bmin);
+//			dynamic_cast<Cluster*>(_sons[1])->subdivide(bmin);
+//
+//		} else {
+//			delete _l_adj_mat;
+//			_l_adj_mat = NULL;
+//		} // end if next recursion step
 	} // end if ( connected && size () > bmin )
 }
 
diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/ClusterBase.cpp b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/ClusterBase.cpp
index 215672da6d6f443b4f3717ea0065fadaef71ff64..23565205a7bc4b5aa0517c0d13952b7f7eb82a32 100644
--- a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/ClusterBase.cpp
+++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/ClusterBase.cpp
@@ -14,7 +14,7 @@ namespace MathLib {
 ClusterBase::ClusterBase(unsigned n, unsigned const* const iA,
 		unsigned const*const jA) :
 	_beg(0), _end(n), _n_sons(0), _sons(NULL), _parent(NULL), _g_op_perm(NULL),
-	_g_po_perm(NULL), _l_adj_mat(NULL)
+	_g_po_perm(NULL), _g_adj_mat(NULL), _l_adj_mat(NULL)
 {
 	const unsigned nnz = iA[n];
 
@@ -28,6 +28,23 @@ ClusterBase::ClusterBase(unsigned n, unsigned const* const iA,
 
 	_l_adj_mat = new AdjMat(n, row_ptr, col_idx);
 	_l_adj_mat->makeSymmetric();
+
+	// make a copy of the local row_ptr array
+	unsigned const* l_row_ptr(_l_adj_mat->getRowPtrArray());
+	unsigned *g_row_ptr(new unsigned[n + 1]);
+	for (unsigned k = 0; k <= n; ++k)
+		g_row_ptr[k] = l_row_ptr[k];
+	// make a copy of the local col_idx array
+	unsigned const* l_col_idx(_l_adj_mat->getColIdxArray());
+	const unsigned g_nnz(g_row_ptr[n]);
+	unsigned *g_col_idx(new unsigned[g_nnz]);
+	for (unsigned k = 0; k < g_nnz; ++k)
+		g_col_idx[k] = l_col_idx[k];
+	// generate global matrix from local matrix
+	// (only in the root of cluster tree)
+	_g_adj_mat = new AdjMat(n, g_row_ptr, g_col_idx);
+
+
 }
 
 ClusterBase::ClusterBase(ClusterBase *father, unsigned beg, unsigned end,
diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/ClusterBase.h b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/ClusterBase.h
index 291df29ab15faa4e8cc9c843403d0589d2aa656b..1f682f6acd354548bb2381f3e6dfc6ebdaa03bea 100644
--- a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/ClusterBase.h
+++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/ClusterBase.h
@@ -46,6 +46,10 @@ public:
 
 	virtual bool isSeparator() const = 0;
 
+#ifndef NDEBUG
+	AdjMat const* getGlobalAdjMat() const { return _g_adj_mat; }
+#endif
+
 protected:
 	/** \brief Method returns the pointer to the parent cluster.
 	 \returns parent cluster */
diff --git a/SimpleTests/MatrixTests/CMakeLists.txt b/SimpleTests/MatrixTests/CMakeLists.txt
index 50238c4a25c2bcd8a3082b9bdc6127ba148e0bca..8becff80a2b650f086b2d0284b83d1851116703b 100644
--- a/SimpleTests/MatrixTests/CMakeLists.txt
+++ b/SimpleTests/MatrixTests/CMakeLists.txt
@@ -6,6 +6,13 @@ IF ( CMAKE_USE_PTHREADS_INIT )
         MESSAGE (STATUS "pthread library found." )
 ENDIF (CMAKE_USE_PTHREADS_INIT )
 
+FIND_PACKAGE( Metis )
+IF (METIS_FOUND)
+	MESSAGE (STATUS "metis found.")
+ELSE(METIS_FOUND)
+	MESSAGE (STATUS "metis not found.")
+ENDIF(METIS_FOUND)
+
 INCLUDE_DIRECTORIES(
         .
 	../../Base/
@@ -26,6 +33,13 @@ ADD_EXECUTABLE( MatTestRemoveRowsCols
         ${HEADERS}
 )
 
+ADD_EXECUTABLE( MatVecMultPerm
+        MatVecMultPerm.cpp
+        ${SOURCES}
+        ${HEADERS}
+)
+
+
 IF (WIN32)
         TARGET_LINK_LIBRARIES(MatMult Winmm.lib)
 ENDIF (WIN32)
@@ -44,3 +58,8 @@ TARGET_LINK_LIBRARIES ( MatTestRemoveRowsCols
 	MathLib
 )
 
+TARGET_LINK_LIBRARIES ( MatVecMultPerm
+	Base
+	MathLib
+	${METIS_LIBRARIES}
+)
diff --git a/SimpleTests/MatrixTests/MatVecMultPerm.cpp b/SimpleTests/MatrixTests/MatVecMultPerm.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b41d0fb108c8d891673350ccef0d33d341342180
--- /dev/null
+++ b/SimpleTests/MatrixTests/MatVecMultPerm.cpp
@@ -0,0 +1,149 @@
+/*
+ * MatVecMultPerm.cpp
+ *
+ *  Created on: Jan 3, 2012
+ *      Author: TF
+ */
+
+#include <cstdlib>
+
+// BaseLib
+#include "RunTimeTimer.h"
+#include "CPUTimeTimer.h"
+
+// MathLib
+#include "sparse.h"
+
+#include "LinAlg/Sparse/NestedDissectionPermutation/AdjMat.h"
+#include "LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReordered.h"
+#include "LinAlg/Sparse/NestedDissectionPermutation/Cluster.h"
+#include "LinAlg/Sparse/CRSMatrix.h"
+
+int main(int argc, char *argv[])
+{
+	if (argc < 4) {
+		std::cout << "Usage: " << argv[0] << " matrix number_of_multiplications resultfile" << std::endl;
+		return 1;
+	}
+
+	// read the number of multiplication to execute
+	unsigned n_mults (0);
+	n_mults = atoi (argv[2]);
+
+	std::string fname_mat (argv[1]);
+
+	bool verbose (true);
+
+	// *** reading matrix in crs format from file
+	std::ifstream in(fname_mat.c_str(), std::ios::in | std::ios::binary);
+	double *A(NULL);
+	unsigned *iA(NULL), *jA(NULL), n;
+	if (in) {
+		if (verbose) {
+			std::cout << "reading matrix from " << fname_mat << " ... " << std::flush;
+		}
+		RunTimeTimer timer;
+		timer.start();
+		CS_read(in, n, iA, jA, A);
+		timer.stop();
+		if (verbose) {
+			std::cout << "ok, " << timer.elapsed() << " s)" << std::endl;
+		}
+	} else {
+		std::cout << "error reading matrix from " << fname_mat << std::endl;
+	}
+	unsigned nnz(iA[n]);
+	if (verbose) {
+		std::cout << "Parameters read: n=" << n << ", nnz=" << nnz << std::endl;
+	}
+
+//	MathLib::CRSMatrix<double, unsigned> mat(n, iA, jA, A);
+	MathLib::CRSMatrixReordered mat(n, iA, jA, A);
+	std::cout << mat.getNRows() << " x " << mat.getNCols() << std::endl;
+
+	double *x(new double[n]);
+	double *y(new double[n]);
+
+	for (unsigned k(0); k<n; ++k)
+		x[k] = 1.0;
+
+	// create time measurement objects
+	RunTimeTimer run_timer;
+	CPUTimeTimer cpu_timer;
+
+	// calculate the nested dissection reordering
+	if (verbose) {
+		std::cout << "calculating nested dissection permutation of matrix ... " << std::flush;
+	}
+	run_timer.start();
+	cpu_timer.start();
+	MathLib::Cluster cluster_tree(n, iA, jA);
+	unsigned *op_perm(new unsigned[n]);
+	unsigned *po_perm(new unsigned[n]);
+	for (unsigned k(0); k<n; k++)
+		op_perm[k] = po_perm[k] = k;
+	cluster_tree.createClusterTree(op_perm, po_perm, 1000);
+	cpu_timer.stop();
+	run_timer.stop();
+	if (verbose)
+		std::cout << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << std::endl;
+
+	// applying the nested dissection reordering
+	if (verbose) {
+		std::cout << "applying nested dissection permutation to FEM matrix ... " << std::flush;
+	}
+	run_timer.start();
+	cpu_timer.start();
+	mat.reorderMatrix(op_perm, po_perm);
+	cpu_timer.stop();
+	run_timer.stop();
+	if (verbose) std::cout << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << std::endl;
+
+#ifndef NDEBUG
+//	MathLib::AdjMat *global_reordered_adj_mat((cluster_tree.getGlobalAdjMat())->getMat(0,n,op_perm, po_perm));
+//	const unsigned adj_nnz(global_reordered_adj_mat->getNNZ());
+//	double* adj_mat_data(new double[adj_nnz]);
+//	for (unsigned k(0); k<adj_nnz; k++) adj_mat_data[k] = 1.0;
+//	std::string fname_out (fname_mat);
+//	fname_out = fname_out.substr(0,fname_mat.length()-4);
+//	fname_out += "_adj.bin";
+//	std::ofstream os (fname_out.c_str(), std::ios::binary);
+//	CS_write(os, n, global_reordered_adj_mat->getRowPtrArray(), global_reordered_adj_mat->getColIdxArray(), adj_mat_data);
+	std::string fname_fem_out (fname_mat);
+	fname_fem_out = fname_fem_out.substr(0,fname_mat.length()-4);
+	fname_fem_out += "_fem_reordered.bin";
+	std::ofstream os (fname_fem_out.c_str(), std::ios::binary);
+	CS_write(os, n, mat.getRowPtrArray(), mat.getColIdxArray(), mat.getEntryArray());
+#endif
+
+	if (verbose) {
+		std::cout << "matrix vector multiplication with Toms amuxCRS ... " << std::flush;
+	}
+	run_timer.start();
+	cpu_timer.start();
+	for (size_t k(0); k<n_mults; k++) {
+		mat.amux (1.0, x, y);
+	}
+	cpu_timer.stop();
+	run_timer.stop();
+
+	if (verbose) {
+		std::cout << "done [" << cpu_timer.elapsed() << " sec cpu time], ["
+				<< run_timer.elapsed() << " sec run time]" << std::endl;
+	} else {
+		if (argc == 5) {
+			std::ofstream result_os (argv[4], std::ios::app);
+			if (result_os) {
+				result_os << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << std::endl;
+			}
+			result_os.close();
+		} else {
+			std::cout << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << std::endl;
+		}
+	}
+
+	delete [] x;
+	delete [] y;
+
+	return 0;
+}
diff --git a/cmake/FindMetis.cmake b/cmake/FindMetis.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..db54a0ec1cd9dc9779e15749244abcfe5a3e0611
--- /dev/null
+++ b/cmake/FindMetis.cmake
@@ -0,0 +1,28 @@
+#
+# Find the METIS includes and libraries
+#
+# METIS is an library that implements a variety of algorithms for
+# partitioning unstructured graphs, meshes, and for computing fill-reducing orderings of
+# sparse matrices. It can be found at:
+# http://glaros.dtc.umn.edu/gkhome/metis/metis/download
+#
+# METIS_INCLUDE_DIR - where to find autopack.h
+# METIS_LIBRARIES   - List of fully qualified libraries to link against.
+# METIS_FOUND       - Do not attempt to use if "no" or undefined.
+
+FIND_PATH(METIS_INCLUDE_DIR metis.h
+	/usr/include/metis
+	/home/fischeth/include/
+)
+
+FIND_LIBRARY(METIS_LIBRARY metis
+	/usr/lib
+	/home/fischeth/lib/
+)
+
+IF(METIS_INCLUDE_DIR)
+  IF(METIS_LIBRARY)
+    SET( METIS_LIBRARIES ${METIS_LIBRARY})
+    SET( METIS_FOUND "YES" )
+  ENDIF(METIS_LIBRARY)
+ENDIF(METIS_INCLUDE_DIR)