From 307867e886f6b95b9473d5ba6598e486a93f1e99 Mon Sep 17 00:00:00 2001
From: Thomas Fischer <thomas.fischer@ufz.de>
Date: Wed, 14 Dec 2011 12:54:10 +0100
Subject: [PATCH] try to improve speedup using other OpenMP pragmas, reducing
 the number of parameters of function scpr and omit a initialization of a
 vector

---
 MathLib/LinAlg/Solvers/CGParallel.cpp | 34 +++++++++++++--------------
 MathLib/MathTools.cpp                 |  7 +++---
 MathLib/MathTools.h                   |  3 +--
 3 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/MathLib/LinAlg/Solvers/CGParallel.cpp b/MathLib/LinAlg/Solvers/CGParallel.cpp
index 79f1cf26e71..fa2b8153c07 100644
--- a/MathLib/LinAlg/Solvers/CGParallel.cpp
+++ b/MathLib/LinAlg/Solvers/CGParallel.cpp
@@ -84,7 +84,7 @@ unsigned CGParallel(CRSMatrix<double,unsigned> const * mat, double const * const
 		mat->precondApply(rhat);
 
 		// rho = r * r^;
-		rho = scpr(r, rhat, N, num_threads);
+		rho = scpr(r, rhat, N);
 
 		if (l > 1) {
 			double beta = rho / rho1;
@@ -102,29 +102,29 @@ unsigned CGParallel(CRSMatrix<double,unsigned> const * mat, double const * const
 		}
 
 		// q = Ap
-//		blas::setzero(N, q);
-		#pragma omp parallel for
-		for (k = 0; k < N; k++) {
-			q[k] = 0.0;
-		}
 		mat->amux(D_ONE, p, q);
 
 		// alpha = rho / p*q
-		double alpha = rho / scpr(p, q, N, num_threads);
+		double alpha = rho / scpr(p, q, N);
 
-		// x += alpha * p
-		#pragma omp parallel for
-		for (k = 0; k < N; k++) {
-			x[k] += alpha * p[k];
-		}
+		#pragma omp parallel
+		{
+			// x += alpha * p
+			#pragma omp for nowait
+			for (k = 0; k < N; k++) {
+				x[k] += alpha * p[k];
+			}
 
-		// r -= alpha * q
-		#pragma omp parallel for
-		for (k = 0; k < N; k++) {
-			r[k] -= alpha * q[k];
+			// r -= alpha * q
+			#pragma omp for nowait
+			for (k = 0; k < N; k++) {
+				r[k] -= alpha * q[k];
+			}
+
+			#pragma omp barrier
 		}
 
-		resid = sqrt(scpr(r, r, N, num_threads));
+		resid = sqrt(scpr(r, r, N));
 
 		if (resid <= eps * nrmb) {
 			eps = resid / nrmb;
diff --git a/MathLib/MathTools.cpp b/MathLib/MathTools.cpp
index ccad1686733..35bd2612dc1 100644
--- a/MathLib/MathTools.cpp
+++ b/MathLib/MathTools.cpp
@@ -10,15 +10,14 @@
 namespace MathLib {
 
 #ifdef _OPENMP
-double scpr(double const * const v, double const * const w, unsigned n,
-        unsigned num_of_threads)
+double scpr(double const * const v, double const * const w, unsigned n)
 {
-        long double res (0.0);
+        long double res (v[0]*w[0]);
         unsigned k;
         #pragma omp parallel
         {
                 #pragma omp parallel for reduction (+:res)
-                for (k = 0; k<n; k++) {
+                for (k = 1; k<n; k++) {
                         res += v[k] * w[k];
                 }
         }
diff --git a/MathLib/MathTools.h b/MathLib/MathTools.h
index 1d88785af6d..4a9d2b804f9 100644
--- a/MathLib/MathTools.h
+++ b/MathLib/MathTools.h
@@ -35,8 +35,7 @@ double scpr(const T* v0, const T* v1, size_t n)
 }
 
 #ifdef _OPENMP
-double scpr(double const * const v, double const * const w, unsigned n,
-        unsigned num_of_threads);
+double scpr(double const * const v, double const * const w, unsigned n);
 #endif
 
 /**
-- 
GitLab