From e94d51150f7afd8aa9afe1207441c95fd96a9fd0 Mon Sep 17 00:00:00 2001 From: Thomas Fischer <thomas.fischer@ufz.de> Date: Fri, 20 Jan 2012 13:04:40 +0100 Subject: [PATCH] implemented new class CRSMatrixReorderedOpenMP which uses Nested Dissection reordering and OpenMP for matrix vector multiplication --- MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h | 13 +- .../CRSMatrixReorderedOpenMP.cpp | 29 ++++ .../CRSMatrixReorderedOpenMP.h | 27 ++++ SimpleTests/MatrixTests/CMakeLists.txt | 12 ++ SimpleTests/MatrixTests/MatMult.cpp | 2 +- .../MatrixTests/MatVecMultNDPermOpenMP.cpp | 152 ++++++++++++++++++ 6 files changed, 226 insertions(+), 9 deletions(-) create mode 100644 MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.cpp create mode 100644 MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.h create mode 100644 SimpleTests/MatrixTests/MatVecMultNDPermOpenMP.cpp diff --git a/MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h b/MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h index a5297fc2418..e60f4f43761 100644 --- a/MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h +++ b/MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h @@ -18,16 +18,16 @@ namespace MathLib { template<typename FP_TYPE, typename IDX_TYPE> class CRSMatrixOpenMP : public CRSMatrix<FP_TYPE, IDX_TYPE> { public: - CRSMatrixOpenMP(std::string const &fname, unsigned num_of_threads) : - CRSMatrix<FP_TYPE, IDX_TYPE>(fname), _num_of_threads (num_of_threads) + CRSMatrixOpenMP(std::string const &fname) : + CRSMatrix<FP_TYPE, IDX_TYPE>(fname) {} - CRSMatrixOpenMP(unsigned n, IDX_TYPE *iA, IDX_TYPE *jA, FP_TYPE* A, unsigned num_of_threads) : - CRSMatrix<FP_TYPE, IDX_TYPE>(n, iA, jA, A), _num_of_threads (num_of_threads) + CRSMatrixOpenMP(unsigned n, IDX_TYPE *iA, IDX_TYPE *jA, FP_TYPE* A) : + CRSMatrix<FP_TYPE, IDX_TYPE>(n, iA, jA, A) {} CRSMatrixOpenMP(unsigned n1) : - CRSMatrix<FP_TYPE, IDX_TYPE>(n1), _num_of_threads (1) + CRSMatrix<FP_TYPE, IDX_TYPE>(n1) {} virtual ~CRSMatrixOpenMP() @@ -37,9 +37,6 @@ public: { amuxCRSParallelOpenMP(d, MatrixBase::_n_rows, CRSMatrix<FP_TYPE,IDX_TYPE>::_row_ptr, CRSMatrix<FP_TYPE,IDX_TYPE>::_col_idx, CRSMatrix<FP_TYPE,IDX_TYPE>::_data, x, y); } - -private: - unsigned _num_of_threads; }; } // end namespace MathLib diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.cpp b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.cpp new file mode 100644 index 00000000000..f773702c42e --- /dev/null +++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.cpp @@ -0,0 +1,29 @@ +/* + * CRSMatrixReorderedOpenMP.cpp + * + * Created on: Jan 20, 2012 + * Author: TF + */ + +#ifdef _OPENMP +#include "LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.h" +#include "LinAlg/Sparse/amuxCRS.h" + +namespace MathLib { + +CRSMatrixReorderedOpenMP::CRSMatrixReorderedOpenMP(unsigned n, unsigned *iA, unsigned *jA, double* A) : + CRSMatrixReordered(n, iA, jA, A) +{ +} + +CRSMatrixReorderedOpenMP::~CRSMatrixReorderedOpenMP() +{} + +void CRSMatrixReorderedOpenMP::amux(double d, double const * const x, double *y) const +{ + amuxCRSParallelOpenMP(d, MatrixBase::_n_rows, CRSMatrix<double,unsigned>::_row_ptr, CRSMatrix<double,unsigned>::_col_idx, CRSMatrix<double,unsigned>::_data, x, y); +} + +} // end namespace MathLib + +#endif // _OPENMP diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.h b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.h new file mode 100644 index 00000000000..471a91980e8 --- /dev/null +++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.h @@ -0,0 +1,27 @@ +/* + * CRSMatrixReorderedOpenMP.h + * + * Created on: Jan 20, 2012 + * Author: TF + */ + +#ifndef CRSMATRIXREORDEREDOPENMP_H_ +#define CRSMATRIXREORDEREDOPENMP_H_ + +#ifdef _OPENMP +#include "LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReordered.h" + +namespace MathLib { + +class CRSMatrixReorderedOpenMP : public CRSMatrixReordered { +public: + CRSMatrixReorderedOpenMP(unsigned n, unsigned *iA, unsigned *jA, double* A); + virtual ~CRSMatrixReorderedOpenMP(); + + virtual void amux(double d, double const * const x, double *y) const; +}; + +} +#endif // _OPENMP + +#endif /* CRSMATRIXREORDEREDOPENMP_H_ */ diff --git a/SimpleTests/MatrixTests/CMakeLists.txt b/SimpleTests/MatrixTests/CMakeLists.txt index 693185ee6c2..0bbb06cdbf2 100644 --- a/SimpleTests/MatrixTests/CMakeLists.txt +++ b/SimpleTests/MatrixTests/CMakeLists.txt @@ -38,6 +38,18 @@ IF (METIS_FOUND) MathLib ${METIS_LIBRARIES} ) + + ADD_EXECUTABLE( MatVecMultNDPermOpenMP + MatVecMultNDPermOpenMP.cpp + ${SOURCES} + ${HEADERS} + ) + + TARGET_LINK_LIBRARIES ( MatVecMultNDPermOpenMP + Base + MathLib + ${METIS_LIBRARIES} + ) ENDIF(METIS_FOUND) diff --git a/SimpleTests/MatrixTests/MatMult.cpp b/SimpleTests/MatrixTests/MatMult.cpp index 4b7f5d4cff5..f1de76aa8af 100644 --- a/SimpleTests/MatrixTests/MatMult.cpp +++ b/SimpleTests/MatrixTests/MatMult.cpp @@ -58,7 +58,7 @@ int main(int argc, char *argv[]) #ifdef _OPENMP omp_set_num_threads(n_threads); - MathLib::CRSMatrixOpenMP<double, unsigned> mat (n, iA, jA, A, n_threads); + MathLib::CRSMatrixOpenMP<double, unsigned> mat (n, iA, jA, A); #else MathLib::CRSMatrix<double, unsigned> mat (n, iA, jA, A); #endif diff --git a/SimpleTests/MatrixTests/MatVecMultNDPermOpenMP.cpp b/SimpleTests/MatrixTests/MatVecMultNDPermOpenMP.cpp new file mode 100644 index 00000000000..1947d62bd2b --- /dev/null +++ b/SimpleTests/MatrixTests/MatVecMultNDPermOpenMP.cpp @@ -0,0 +1,152 @@ +/* + * MatVecMultNDPermOpenMP.cpp + * + * Created on: Jan 20, 2012 + * Author: TF + */ + +#include <cstdlib> + +// BaseLib +#include "RunTimeTimer.h" +#include "CPUTimeTimer.h" + +// MathLib +#include "sparse.h" + +#include "LinAlg/Sparse/NestedDissectionPermutation/AdjMat.h" +#include "LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.h" +#include "LinAlg/Sparse/NestedDissectionPermutation/Cluster.h" + +int main(int argc, char *argv[]) +{ + if (argc < 4) { + std::cout << "Usage: " << argv[0] << " matrix number_of_multiplications resultfile" << std::endl; + return 1; + } + + // read the number of multiplication to execute + unsigned n_mults (0); + n_mults = atoi (argv[2]); + + std::string fname_mat (argv[1]); + + bool verbose (true); + + // *** reading matrix in crs format from file + std::ifstream in(fname_mat.c_str(), std::ios::in | std::ios::binary); + double *A(NULL); + unsigned *iA(NULL), *jA(NULL), n; + if (in) { + if (verbose) { + std::cout << "reading matrix from " << fname_mat << " ... " << std::flush; + } + RunTimeTimer timer; + timer.start(); + CS_read(in, n, iA, jA, A); + timer.stop(); + if (verbose) { + std::cout << "ok, [wclock: " << timer.elapsed() << " s]" << std::endl; + } + } else { + std::cout << "error reading matrix from " << fname_mat << std::endl; + } + unsigned nnz(iA[n]); + if (verbose) { + std::cout << "Parameters read: n=" << n << ", nnz=" << nnz << std::endl; + } + + MathLib::CRSMatrixReorderedOpenMP mat(n, iA, jA, A); + + double *x(new double[n]); + double *y(new double[n]); + + for (unsigned k(0); k<n; ++k) + x[k] = 1.0; + + // create time measurement objects + RunTimeTimer run_timer; + CPUTimeTimer cpu_timer; + + // calculate the nested dissection reordering + if (verbose) { + std::cout << "calculating nested dissection permutation of matrix ... " << std::flush; + } + run_timer.start(); + cpu_timer.start(); + MathLib::Cluster cluster_tree(n, iA, jA); + unsigned *op_perm(new unsigned[n]); + unsigned *po_perm(new unsigned[n]); + for (unsigned k(0); k<n; k++) + op_perm[k] = po_perm[k] = k; + cluster_tree.createClusterTree(op_perm, po_perm, 1000); + cpu_timer.stop(); + run_timer.stop(); + if (verbose) { + std::cout << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << std::endl; + } else { + if (argc == 4) { + std::ofstream result_os(argv[3], std::ios::app); + if (result_os) { + result_os << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << " calc nested dissection perm" << std::endl; + } + result_os.close(); + } else { + std::cout << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << " calc nested dissection perm" << std::endl; + } + } + + + // applying the nested dissection reordering + if (verbose) { + std::cout << "applying nested dissection permutation to FEM matrix ... " << std::flush; + } + run_timer.start(); + cpu_timer.start(); + mat.reorderMatrix(op_perm, po_perm); + cpu_timer.stop(); + run_timer.stop(); + if (verbose) std::cout << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << std::endl; + else { + if (argc == 4) { + std::ofstream result_os(argv[3], std::ios::app); + if (result_os) { + result_os << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << " applying nested dissection perm" << std::endl; + } + result_os.close(); + } else { + std::cout << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << std::endl; + } + } + + if (verbose) { + std::cout << "matrix vector multiplication with Toms amuxCRS ... " << std::flush; + } + run_timer.start(); + cpu_timer.start(); + for (size_t k(0); k<n_mults; k++) { + mat.amux (1.0, x, y); + } + cpu_timer.stop(); + run_timer.stop(); + + if (verbose) { + std::cout << "done [" << cpu_timer.elapsed() << " sec cpu time], [wclock: " + << run_timer.elapsed() << " sec]" << std::endl; + } else { + if (argc == 4) { + std::ofstream result_os (argv[3], std::ios::app); + if (result_os) { + result_os << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << " " << n_mults << " MatVecMults, matrix " << fname_mat << std::endl; + } + result_os.close(); + } else { + std::cout << cpu_timer.elapsed() << "\t" << run_timer.elapsed() << std::endl; + } + } + + delete [] x; + delete [] y; + + return 0; +} -- GitLab