From 307867e886f6b95b9473d5ba6598e486a93f1e99 Mon Sep 17 00:00:00 2001 From: Thomas Fischer <thomas.fischer@ufz.de> Date: Wed, 14 Dec 2011 12:54:10 +0100 Subject: [PATCH] try to improve speedup using other OpenMP pragmas, reducing the number of parameters of function scpr and omit a initialization of a vector --- MathLib/LinAlg/Solvers/CGParallel.cpp | 34 +++++++++++++-------------- MathLib/MathTools.cpp | 7 +++--- MathLib/MathTools.h | 3 +-- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/MathLib/LinAlg/Solvers/CGParallel.cpp b/MathLib/LinAlg/Solvers/CGParallel.cpp index 79f1cf26e71..fa2b8153c07 100644 --- a/MathLib/LinAlg/Solvers/CGParallel.cpp +++ b/MathLib/LinAlg/Solvers/CGParallel.cpp @@ -84,7 +84,7 @@ unsigned CGParallel(CRSMatrix<double,unsigned> const * mat, double const * const mat->precondApply(rhat); // rho = r * r^; - rho = scpr(r, rhat, N, num_threads); + rho = scpr(r, rhat, N); if (l > 1) { double beta = rho / rho1; @@ -102,29 +102,29 @@ unsigned CGParallel(CRSMatrix<double,unsigned> const * mat, double const * const } // q = Ap -// blas::setzero(N, q); - #pragma omp parallel for - for (k = 0; k < N; k++) { - q[k] = 0.0; - } mat->amux(D_ONE, p, q); // alpha = rho / p*q - double alpha = rho / scpr(p, q, N, num_threads); + double alpha = rho / scpr(p, q, N); - // x += alpha * p - #pragma omp parallel for - for (k = 0; k < N; k++) { - x[k] += alpha * p[k]; - } + #pragma omp parallel + { + // x += alpha * p + #pragma omp for nowait + for (k = 0; k < N; k++) { + x[k] += alpha * p[k]; + } - // r -= alpha * q - #pragma omp parallel for - for (k = 0; k < N; k++) { - r[k] -= alpha * q[k]; + // r -= alpha * q + #pragma omp for nowait + for (k = 0; k < N; k++) { + r[k] -= alpha * q[k]; + } + + #pragma omp barrier } - resid = sqrt(scpr(r, r, N, num_threads)); + resid = sqrt(scpr(r, r, N)); if (resid <= eps * nrmb) { eps = resid / nrmb; diff --git a/MathLib/MathTools.cpp b/MathLib/MathTools.cpp index ccad1686733..35bd2612dc1 100644 --- a/MathLib/MathTools.cpp +++ b/MathLib/MathTools.cpp @@ -10,15 +10,14 @@ namespace MathLib { #ifdef _OPENMP -double scpr(double const * const v, double const * const w, unsigned n, - unsigned num_of_threads) +double scpr(double const * const v, double const * const w, unsigned n) { - long double res (0.0); + long double res (v[0]*w[0]); unsigned k; #pragma omp parallel { #pragma omp parallel for reduction (+:res) - for (k = 0; k<n; k++) { + for (k = 1; k<n; k++) { res += v[k] * w[k]; } } diff --git a/MathLib/MathTools.h b/MathLib/MathTools.h index 1d88785af6d..4a9d2b804f9 100644 --- a/MathLib/MathTools.h +++ b/MathLib/MathTools.h @@ -35,8 +35,7 @@ double scpr(const T* v0, const T* v1, size_t n) } #ifdef _OPENMP -double scpr(double const * const v, double const * const w, unsigned n, - unsigned num_of_threads); +double scpr(double const * const v, double const * const w, unsigned n); #endif /** -- GitLab