From 435c5305db0349f65e16a606f21f5c787f9ee474 Mon Sep 17 00:00:00 2001 From: Thomas Fischer <thomas.fischer@ufz.de> Date: Tue, 13 Dec 2011 15:39:10 +0100 Subject: [PATCH] a try to improve the speedup of CG method --- MathLib/LinAlg/Solvers/CGParallel.cpp | 23 +++++++++++++++++++---- MathLib/MathTools.cpp | 1 - 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/MathLib/LinAlg/Solvers/CGParallel.cpp b/MathLib/LinAlg/Solvers/CGParallel.cpp index 77210867b02..acd28b0a5d2 100644 --- a/MathLib/LinAlg/Solvers/CGParallel.cpp +++ b/MathLib/LinAlg/Solvers/CGParallel.cpp @@ -65,18 +65,23 @@ unsigned CGParallel(CRSMatrix<double,unsigned> const * mat, double const * const return 0; } + unsigned k; for (unsigned l = 1; l <= nsteps; ++l) { #ifndef NDEBUG std::cout << "Step " << l << ", resid=" << resid / nrmb << std::endl; #endif // r^ = C r - blas::copy(N, r, rhat); + // rhat = r +// blas::copy(N, r, rhat); + #pragma omp parallel for + for (k = 0; k < N; k++) { + rhat[k] = r[k]; + } mat->precondApply(rhat); // rho = r * r^; rho = scpr(r, rhat, N, num_threads); - unsigned k; if (l > 1) { double beta = rho / rho1; // p = r^ + beta * p @@ -84,10 +89,20 @@ unsigned CGParallel(CRSMatrix<double,unsigned> const * mat, double const * const for (k = 0; k < N; k++) { p[k] = rhat[k] + beta * p[k]; } - } else blas::copy(N, rhat, p); + } else { +// blas::copy(N, rhat, p); + #pragma omp parallel for + for (k = 0; k < N; k++) { + p[k] = rhat[k]; + } + } // q = Ap - blas::setzero(N, q); +// blas::setzero(N, q); + #pragma omp parallel for + for (k = 0; k < N; k++) { + q[k] = 0.0; + } mat->amux(D_ONE, p, q); // alpha = rho / p*q diff --git a/MathLib/MathTools.cpp b/MathLib/MathTools.cpp index 06b01cd6d0a..ccad1686733 100644 --- a/MathLib/MathTools.cpp +++ b/MathLib/MathTools.cpp @@ -15,7 +15,6 @@ double scpr(double const * const v, double const * const w, unsigned n, { long double res (0.0); unsigned k; - omp_set_num_threads (num_of_threads); #pragma omp parallel { #pragma omp parallel for reduction (+:res) -- GitLab