diff --git a/MathLib/CMakeLists.txt b/MathLib/CMakeLists.txt index 7f64f5550870ba9983b0fd8260c67af090194ee2..9dfbbeaeb47d70763f13318d993b46f7e695a25e 100644 --- a/MathLib/CMakeLists.txt +++ b/MathLib/CMakeLists.txt @@ -38,6 +38,7 @@ SET( SOURCES LinAlg/Sparse/amuxCRS.cpp LinAlg/Solvers/BiCGStab.cpp LinAlg/Solvers/CG.cpp + LinAlg/Solvers/CGParallel.cpp LinAlg/Solvers/GMRes.cpp LinAlg/Solvers/GaussAlgorithm.cpp LinAlg/Solvers/TriangularSolve.cpp diff --git a/MathLib/LinAlg/Solvers/CG.cpp b/MathLib/LinAlg/Solvers/CG.cpp index c5438a2261eeb42c7e075b819542f7613f49b5ae..93da2db1a53dd7e483af5346f121c39eed639911 100644 --- a/MathLib/LinAlg/Solvers/CG.cpp +++ b/MathLib/LinAlg/Solvers/CG.cpp @@ -7,8 +7,6 @@ #include <limits> -#include <omp.h> - #include "MathTools.h" #include "blas.h" #include "../Sparse/CRSMatrix.h" @@ -30,7 +28,7 @@ namespace MathLib { unsigned CG(CRSMatrix<double,unsigned> const * mat, double const * const b, - double* const x, double& eps, unsigned& nsteps, unsigned num_threads) + double* const x, double& eps, unsigned& nsteps) { unsigned N = mat->getNRows(); double *p, *q, *r, *rhat, rho, rho1 = 0.0; @@ -64,11 +62,9 @@ unsigned CG(CRSMatrix<double,unsigned> const * mat, double const * const b, } for (unsigned l = 1; l <= nsteps; ++l) { - #ifndef NDEBUG std::cout << "Step " << l << ", resid=" << resid / nrmb << std::endl; #endif - // r^ = C r blas::copy(N, r, rhat); mat->precondApply(rhat); @@ -80,8 +76,6 @@ unsigned CG(CRSMatrix<double,unsigned> const * mat, double const * const b, double beta = rho / rho1; // p = r^ + beta * p unsigned k; - omp_set_num_threads(num_threads); -//#pragma omp parallel for for (k = 0; k < N; k++) { p[k] = rhat[k] + beta * p[k]; } diff --git a/MathLib/LinAlg/Solvers/CG.h b/MathLib/LinAlg/Solvers/CG.h index 4b07debdf6a01ac5293e0e8ceb9d8810245bde76..60961d003b5adead7b4bbdd6f2e303cee365ad5c 100644 --- a/MathLib/LinAlg/Solvers/CG.h +++ b/MathLib/LinAlg/Solvers/CG.h @@ -14,7 +14,12 @@ namespace MathLib { template <typename PF_TYPE, typename IDX_TYPE> class CRSMatrix; unsigned CG(CRSMatrix<double,unsigned> const * mat, double const * const b, + double* const x, double& eps, unsigned& nsteps); + +#ifdef _OPENMP +unsigned CGParallel(CRSMatrix<double,unsigned> const * mat, double const * const b, double* const x, double& eps, unsigned& nsteps, unsigned num_threads = 1); +#endif } // end namespace MathLib diff --git a/MathLib/LinAlg/Solvers/CGParallel.cpp b/MathLib/LinAlg/Solvers/CGParallel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a30db79885c99707ddcdc041ffb5eeff1dac03f1 --- /dev/null +++ b/MathLib/LinAlg/Solvers/CGParallel.cpp @@ -0,0 +1,126 @@ +/* + * CGParallel.cpp + * + * Created on: Dec 2, 2011 + * Author: TF + */ + +#include <limits> + +#include <omp.h> + +#include "MathTools.h" +#include "blas.h" +#include "../Sparse/CRSMatrix.h" +#include "../Sparse/CRSMatrixDiagPrecond.h" + +// CG solves the symmetric positive definite linear +// system Ax=b using the Conjugate Gradient method. +// +// The return value indicates convergence within max_iter (input) +// iterations (0), or no convergence within max_iter iterations (1). +// +// Upon successful return, output arguments have the following values: +// +// x -- approximate solution to Ax = b +// nsteps -- the number of iterations performed before the +// tolerance was reached +// eps -- the residual after the final iteration + +namespace MathLib { + +#ifdef _OPENMP +unsigned CGParallel(CRSMatrix<double,unsigned> const * mat, double const * const b, + double* const x, double& eps, unsigned& nsteps, unsigned num_threads) +{ + omp_set_num_threads(num_threads); + unsigned N = mat->getNRows(); + double *p, *q, *r, *rhat, rho, rho1 = 0.0; + + p = new double[4* N]; + q = p + N; + r = q + N; + rhat = r + N; + + double nrmb = sqrt(scpr(b, b, N)); + if (nrmb < std::numeric_limits<double>::epsilon()) { + blas::setzero(N, x); + eps = 0.0; + nsteps = 0; + delete[] p; + return 0; + } + + // r0 = b - Ax0 + mat->amux(D_MONE, x, r); + for (unsigned k(0); k < N; k++) { + r[k] = b[k] - r[k]; + } + + double resid = blas::nrm2(N, r); + if (resid <= eps * nrmb) { + eps = resid / nrmb; + nsteps = 0; + delete[] p; + return 0; + } + + for (unsigned l = 1; l <= nsteps; ++l) { +#ifndef NDEBUG + std::cout << "Step " << l << ", resid=" << resid / nrmb << std::endl; +#endif + // r^ = C r + blas::copy(N, r, rhat); + mat->precondApply(rhat); + + // rho = r * r^; + rho = scpr(r, rhat, N, num_threads); + + unsigned k; + if (l > 1) { + double beta = rho / rho1; + // p = r^ + beta * p + #pragma omp parallel for + for (k = 0; k < N; k++) { + p[k] = rhat[k] + beta * p[k]; + } + } else blas::copy(N, rhat, p); + + // q = Ap + blas::setzero(N, q); + mat->amux(D_ONE, p, q); + + // alpha = rho / p*q + double alpha = rho / scpr(p, q, N, num_threads); + + // x += alpha * p + #pragma omp parallel for + for (k = 0; k < N; k++) { + x[k] += alpha * p[k]; + } + + // r -= alpha * q + blas::axpy(N, -alpha, q, r); + #pragma omp parallel for + for (k = 0; k < N; k++) { + r[k] -= alpha * q[k]; + } + + resid = sqrt(scpr(r, r, N, num_threads)); + + if (resid <= eps * nrmb) { + eps = resid / nrmb; + nsteps = l; + delete[] p; + return 0; + } + + rho1 = rho; + } + eps = resid / nrmb; + delete[] p; + return 1; +} +#endif + +} // end of namespace MathLib diff --git a/MathLib/MathTools.cpp b/MathLib/MathTools.cpp index b332447976cceb1908d620716915b9ef66aeb8f4..06b01cd6d0a88e248b92f4d078f4d7a7f84c418b 100644 --- a/MathLib/MathTools.cpp +++ b/MathLib/MathTools.cpp @@ -9,6 +9,26 @@ namespace MathLib { +#ifdef _OPENMP +double scpr(double const * const v, double const * const w, unsigned n, + unsigned num_of_threads) +{ + long double res (0.0); + unsigned k; + omp_set_num_threads (num_of_threads); + #pragma omp parallel + { + #pragma omp parallel for reduction (+:res) + for (k = 0; k<n; k++) { + res += v[k] * w[k]; + } + } + + return res; +} +#endif + + void crossProd(const double u[3], const double v[3], double r[3]) { r[0] = u[1] * v[2] - u[2] * v[1]; diff --git a/MathLib/MathTools.h b/MathLib/MathTools.h index 71746cbafd1a7f15b7422c8da2f29ec8f6fbec7c..1d88785af6d6bbe991890b0e1349f0759d996870 100644 --- a/MathLib/MathTools.h +++ b/MathLib/MathTools.h @@ -2,7 +2,7 @@ * \file MathTools.h * * Created on: Jan 13, 2010 - * Author: fischeth + * Author: TF */ #ifndef MATHTOOLS_H_ @@ -11,8 +11,12 @@ #include <vector> #include <cmath> #include <limits> + +#include <omp.h> + #include "Point.h" + namespace MathLib { /** @@ -22,13 +26,19 @@ namespace MathLib { * \param n the size of the array * */ template<class T> inline -double scpr(const T* v0, const T* v1, size_t n) { +double scpr(const T* v0, const T* v1, size_t n) +{ long double res(0.0); for (size_t k(0); k<n; k++) res += v0[k] * v1[k]; return (double) res; } +#ifdef _OPENMP +double scpr(double const * const v, double const * const w, unsigned n, + unsigned num_of_threads); +#endif + /** * computes the cross (or vector) product of the 3d vectors u and v * the result is given in the vector r diff --git a/SimpleTests/SolverTests/ConjugateGradientDiagonalPreconditioned.cpp b/SimpleTests/SolverTests/ConjugateGradientDiagonalPreconditioned.cpp index 043afa1fce8f63f801f4860ed1d06bb5ec1d8204..44855d1109847006bbc95b2991fe8a9f90be30ae 100644 --- a/SimpleTests/SolverTests/ConjugateGradientDiagonalPreconditioned.cpp +++ b/SimpleTests/SolverTests/ConjugateGradientDiagonalPreconditioned.cpp @@ -61,7 +61,11 @@ int main(int argc, char *argv[]) run_timer.start(); cpu_timer.start(); - MathLib::CG (mat, b, x, eps, steps, num_omp_threads); + if (num_omp_threads == 1) { + MathLib::CG(mat, b, x, eps, steps); + } else { + MathLib::CGParallel(mat, b, x, eps, steps, num_omp_threads); + } cpu_timer.stop(); run_timer.stop(); diff --git a/SimpleTests/SolverTests/ConjugateGradientUnpreconditioned.cpp b/SimpleTests/SolverTests/ConjugateGradientUnpreconditioned.cpp index 258fde9616bbc4e15ad553c626bdd86a496f48f3..1f0c4cb92b1fc5ea10b796b4d998ca64e60ac861 100644 --- a/SimpleTests/SolverTests/ConjugateGradientUnpreconditioned.cpp +++ b/SimpleTests/SolverTests/ConjugateGradientUnpreconditioned.cpp @@ -55,7 +55,7 @@ int main(int argc, char *argv[]) // double cg_time (cputime(0.0)); double eps (1.0e-6); unsigned steps (4000); - CG (mat, b, x, eps, steps, 1); + CG (mat, b, x, eps, steps); // cg_time = cputime(cg_time); time(&end_time); std::cout << " in " << steps << " iterations (residuum is " << eps << ") took " << /*cg_time <<*/ " sec time and " << (end_time-start_time) << " sec" << std::endl;