From 435c5305db0349f65e16a606f21f5c787f9ee474 Mon Sep 17 00:00:00 2001
From: Thomas Fischer <thomas.fischer@ufz.de>
Date: Tue, 13 Dec 2011 15:39:10 +0100
Subject: [PATCH] a try to improve the speedup of CG method

---
 MathLib/LinAlg/Solvers/CGParallel.cpp | 23 +++++++++++++++++++----
 MathLib/MathTools.cpp                 |  1 -
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/MathLib/LinAlg/Solvers/CGParallel.cpp b/MathLib/LinAlg/Solvers/CGParallel.cpp
index 77210867b02..acd28b0a5d2 100644
--- a/MathLib/LinAlg/Solvers/CGParallel.cpp
+++ b/MathLib/LinAlg/Solvers/CGParallel.cpp
@@ -65,18 +65,23 @@ unsigned CGParallel(CRSMatrix<double,unsigned> const * mat, double const * const
 		return 0;
 	}
 
+	unsigned k;
 	for (unsigned l = 1; l <= nsteps; ++l) {
 #ifndef NDEBUG
 		std::cout << "Step " << l << ", resid=" << resid / nrmb << std::endl;
 #endif
 		// r^ = C r
-		blas::copy(N, r, rhat);
+		// rhat = r
+//		blas::copy(N, r, rhat);
+		#pragma omp parallel for
+		for (k = 0; k < N; k++) {
+			rhat[k] = r[k];
+		}
 		mat->precondApply(rhat);
 
 		// rho = r * r^;
 		rho = scpr(r, rhat, N, num_threads);
 
-		unsigned k;
 		if (l > 1) {
 			double beta = rho / rho1;
 			// p = r^ + beta * p
@@ -84,10 +89,20 @@ unsigned CGParallel(CRSMatrix<double,unsigned> const * mat, double const * const
 			for (k = 0; k < N; k++) {
 				p[k] = rhat[k] + beta * p[k];
 			}
-		} else blas::copy(N, rhat, p);
+		} else {
+//			blas::copy(N, rhat, p);
+			#pragma omp parallel for
+			for (k = 0; k < N; k++) {
+				p[k] = rhat[k];
+			}
+		}
 
 		// q = Ap
-		blas::setzero(N, q);
+//		blas::setzero(N, q);
+		#pragma omp parallel for
+		for (k = 0; k < N; k++) {
+			q[k] = 0.0;
+		}
 		mat->amux(D_ONE, p, q);
 
 		// alpha = rho / p*q
diff --git a/MathLib/MathTools.cpp b/MathLib/MathTools.cpp
index 06b01cd6d0a..ccad1686733 100644
--- a/MathLib/MathTools.cpp
+++ b/MathLib/MathTools.cpp
@@ -15,7 +15,6 @@ double scpr(double const * const v, double const * const w, unsigned n,
 {
         long double res (0.0);
         unsigned k;
-        omp_set_num_threads (num_of_threads);
         #pragma omp parallel
         {
                 #pragma omp parallel for reduction (+:res)
-- 
GitLab