From 231758d26b450c56f566e3795a82051cba26b0bd Mon Sep 17 00:00:00 2001 From: Thomas Fischer <thomas.fischer@ufz.de> Date: Wed, 14 Dec 2011 15:35:34 +0100 Subject: [PATCH] put nearly the whole CG method working loop into #pragma omp parallel to avoid overhead for parallelization --- MathLib/LinAlg/Solvers/CGParallel.cpp | 65 +++++++++++++-------------- MathLib/MathTools.cpp | 20 ++++----- 2 files changed, 40 insertions(+), 45 deletions(-) diff --git a/MathLib/LinAlg/Solvers/CGParallel.cpp b/MathLib/LinAlg/Solvers/CGParallel.cpp index fa2b8153c07..8c74cb401b4 100644 --- a/MathLib/LinAlg/Solvers/CGParallel.cpp +++ b/MathLib/LinAlg/Solvers/CGParallel.cpp @@ -41,12 +41,8 @@ unsigned CGParallel(CRSMatrix<double,unsigned> const * mat, double const * const double * __restrict__ rhat(new double[N]); double rho, rho1 = 0.0; -// p = new double[4* N]; -// q = p + N; -// r = q + N; -// rhat = r + N; - double nrmb = sqrt(scpr(b, b, N)); + if (nrmb < std::numeric_limits<double>::epsilon()) { blas::setzero(N, x); eps = 0.0; @@ -74,41 +70,42 @@ unsigned CGParallel(CRSMatrix<double,unsigned> const * mat, double const * const #ifndef NDEBUG std::cout << "Step " << l << ", resid=" << resid / nrmb << std::endl; #endif - // r^ = C r - // rhat = r -// blas::copy(N, r, rhat); - #pragma omp parallel for - for (k = 0; k < N; k++) { - rhat[k] = r[k]; - } - mat->precondApply(rhat); - // rho = r * r^; - rho = scpr(r, rhat, N); - - if (l > 1) { - double beta = rho / rho1; - // p = r^ + beta * p + #pragma omp parallel + { + // r^ = C r + // rhat = r +// blas::copy(N, r, rhat); #pragma omp parallel for for (k = 0; k < N; k++) { - p[k] = rhat[k] + beta * p[k]; + rhat[k] = r[k]; } - } else { -// blas::copy(N, rhat, p); - #pragma omp parallel for - for (k = 0; k < N; k++) { - p[k] = rhat[k]; + mat->precondApply(rhat); + + // rho = r * r^; + rho = scpr(r, rhat, N); + + if (l > 1) { + double beta = rho / rho1; + // p = r^ + beta * p + #pragma omp parallel for + for (k = 0; k < N; k++) { + p[k] = rhat[k] + beta * p[k]; + } + } else { +// blas::copy(N, rhat, p); + #pragma omp parallel for + for (k = 0; k < N; k++) { + p[k] = rhat[k]; + } } - } - // q = Ap - mat->amux(D_ONE, p, q); + // q = Ap + mat->amux(D_ONE, p, q); - // alpha = rho / p*q - double alpha = rho / scpr(p, q, N); + // alpha = rho / p*q + double alpha = rho / scpr(p, q, N); - #pragma omp parallel - { // x += alpha * p #pragma omp for nowait for (k = 0; k < N; k++) { @@ -122,9 +119,9 @@ unsigned CGParallel(CRSMatrix<double,unsigned> const * mat, double const * const } #pragma omp barrier - } - resid = sqrt(scpr(r, r, N)); + resid = sqrt(scpr(r, r, N)); + } // end #pragma omp parallel if (resid <= eps * nrmb) { eps = resid / nrmb; diff --git a/MathLib/MathTools.cpp b/MathLib/MathTools.cpp index 35bd2612dc1..8058f11cc64 100644 --- a/MathLib/MathTools.cpp +++ b/MathLib/MathTools.cpp @@ -12,17 +12,15 @@ namespace MathLib { #ifdef _OPENMP double scpr(double const * const v, double const * const w, unsigned n) { - long double res (v[0]*w[0]); - unsigned k; - #pragma omp parallel - { - #pragma omp parallel for reduction (+:res) - for (k = 1; k<n; k++) { - res += v[k] * w[k]; - } - } - - return res; + long double res (v[0]*w[0]); + unsigned k; + + #pragma omp parallel for reduction (+:res) + for (k = 1; k<n; k++) { + res += v[k] * w[k]; + } + + return res; } #endif -- GitLab