Skip to content
Snippets Groups Projects
Commit e6f2e40e authored by Tom Fischer's avatar Tom Fischer
Browse files

first version of parallelized CG method

parent 9d64276f
No related branches found
No related tags found
No related merge requests found
......@@ -38,6 +38,7 @@ SET( SOURCES
LinAlg/Sparse/amuxCRS.cpp
LinAlg/Solvers/BiCGStab.cpp
LinAlg/Solvers/CG.cpp
LinAlg/Solvers/CGParallel.cpp
LinAlg/Solvers/GMRes.cpp
LinAlg/Solvers/GaussAlgorithm.cpp
LinAlg/Solvers/TriangularSolve.cpp
......
......@@ -7,8 +7,6 @@
#include <limits>
#include <omp.h>
#include "MathTools.h"
#include "blas.h"
#include "../Sparse/CRSMatrix.h"
......@@ -30,7 +28,7 @@
namespace MathLib {
unsigned CG(CRSMatrix<double,unsigned> const * mat, double const * const b,
double* const x, double& eps, unsigned& nsteps, unsigned num_threads)
double* const x, double& eps, unsigned& nsteps)
{
unsigned N = mat->getNRows();
double *p, *q, *r, *rhat, rho, rho1 = 0.0;
......@@ -64,11 +62,9 @@ unsigned CG(CRSMatrix<double,unsigned> const * mat, double const * const b,
}
for (unsigned l = 1; l <= nsteps; ++l) {
#ifndef NDEBUG
std::cout << "Step " << l << ", resid=" << resid / nrmb << std::endl;
#endif
// r^ = C r
blas::copy(N, r, rhat);
mat->precondApply(rhat);
......@@ -80,8 +76,6 @@ unsigned CG(CRSMatrix<double,unsigned> const * mat, double const * const b,
double beta = rho / rho1;
// p = r^ + beta * p
unsigned k;
omp_set_num_threads(num_threads);
//#pragma omp parallel for
for (k = 0; k < N; k++) {
p[k] = rhat[k] + beta * p[k];
}
......
......@@ -14,7 +14,12 @@ namespace MathLib {
template <typename PF_TYPE, typename IDX_TYPE> class CRSMatrix;
unsigned CG(CRSMatrix<double,unsigned> const * mat, double const * const b,
double* const x, double& eps, unsigned& nsteps);
#ifdef _OPENMP
unsigned CGParallel(CRSMatrix<double,unsigned> const * mat, double const * const b,
double* const x, double& eps, unsigned& nsteps, unsigned num_threads = 1);
#endif
} // end namespace MathLib
......
/*
* CGParallel.cpp
*
* Created on: Dec 2, 2011
* Author: TF
*/
#include <limits>
#include <omp.h>
#include "MathTools.h"
#include "blas.h"
#include "../Sparse/CRSMatrix.h"
#include "../Sparse/CRSMatrixDiagPrecond.h"
// CG solves the symmetric positive definite linear
// system Ax=b using the Conjugate Gradient method.
//
// The return value indicates convergence within max_iter (input)
// iterations (0), or no convergence within max_iter iterations (1).
//
// Upon successful return, output arguments have the following values:
//
// x -- approximate solution to Ax = b
// nsteps -- the number of iterations performed before the
// tolerance was reached
// eps -- the residual after the final iteration
namespace MathLib {
#ifdef _OPENMP
unsigned CGParallel(CRSMatrix<double,unsigned> const * mat, double const * const b,
double* const x, double& eps, unsigned& nsteps, unsigned num_threads)
{
omp_set_num_threads(num_threads);
unsigned N = mat->getNRows();
double *p, *q, *r, *rhat, rho, rho1 = 0.0;
p = new double[4* N];
q = p + N;
r = q + N;
rhat = r + N;
double nrmb = sqrt(scpr(b, b, N));
if (nrmb < std::numeric_limits<double>::epsilon()) {
blas::setzero(N, x);
eps = 0.0;
nsteps = 0;
delete[] p;
return 0;
}
// r0 = b - Ax0
mat->amux(D_MONE, x, r);
for (unsigned k(0); k < N; k++) {
r[k] = b[k] - r[k];
}
double resid = blas::nrm2(N, r);
if (resid <= eps * nrmb) {
eps = resid / nrmb;
nsteps = 0;
delete[] p;
return 0;
}
for (unsigned l = 1; l <= nsteps; ++l) {
#ifndef NDEBUG
std::cout << "Step " << l << ", resid=" << resid / nrmb << std::endl;
#endif
// r^ = C r
blas::copy(N, r, rhat);
mat->precondApply(rhat);
// rho = r * r^;
rho = scpr(r, rhat, N, num_threads);
unsigned k;
if (l > 1) {
double beta = rho / rho1;
// p = r^ + beta * p
#pragma omp parallel for
for (k = 0; k < N; k++) {
p[k] = rhat[k] + beta * p[k];
}
} else blas::copy(N, rhat, p);
// q = Ap
blas::setzero(N, q);
mat->amux(D_ONE, p, q);
// alpha = rho / p*q
double alpha = rho / scpr(p, q, N, num_threads);
// x += alpha * p
#pragma omp parallel for
for (k = 0; k < N; k++) {
x[k] += alpha * p[k];
}
// r -= alpha * q
blas::axpy(N, -alpha, q, r);
#pragma omp parallel for
for (k = 0; k < N; k++) {
r[k] -= alpha * q[k];
}
resid = sqrt(scpr(r, r, N, num_threads));
if (resid <= eps * nrmb) {
eps = resid / nrmb;
nsteps = l;
delete[] p;
return 0;
}
rho1 = rho;
}
eps = resid / nrmb;
delete[] p;
return 1;
}
#endif
} // end of namespace MathLib
......@@ -9,6 +9,26 @@
namespace MathLib {
#ifdef _OPENMP
double scpr(double const * const v, double const * const w, unsigned n,
unsigned num_of_threads)
{
long double res (0.0);
unsigned k;
omp_set_num_threads (num_of_threads);
#pragma omp parallel
{
#pragma omp parallel for reduction (+:res)
for (k = 0; k<n; k++) {
res += v[k] * w[k];
}
}
return res;
}
#endif
void crossProd(const double u[3], const double v[3], double r[3])
{
r[0] = u[1] * v[2] - u[2] * v[1];
......
......@@ -2,7 +2,7 @@
* \file MathTools.h
*
* Created on: Jan 13, 2010
* Author: fischeth
* Author: TF
*/
#ifndef MATHTOOLS_H_
......@@ -11,8 +11,12 @@
#include <vector>
#include <cmath>
#include <limits>
#include <omp.h>
#include "Point.h"
namespace MathLib {
/**
......@@ -22,13 +26,19 @@ namespace MathLib {
* \param n the size of the array
* */
template<class T> inline
double scpr(const T* v0, const T* v1, size_t n) {
double scpr(const T* v0, const T* v1, size_t n)
{
long double res(0.0);
for (size_t k(0); k<n; k++)
res += v0[k] * v1[k];
return (double) res;
}
#ifdef _OPENMP
double scpr(double const * const v, double const * const w, unsigned n,
unsigned num_of_threads);
#endif
/**
* computes the cross (or vector) product of the 3d vectors u and v
* the result is given in the vector r
......
......@@ -61,7 +61,11 @@ int main(int argc, char *argv[])
run_timer.start();
cpu_timer.start();
MathLib::CG (mat, b, x, eps, steps, num_omp_threads);
if (num_omp_threads == 1) {
MathLib::CG(mat, b, x, eps, steps);
} else {
MathLib::CGParallel(mat, b, x, eps, steps, num_omp_threads);
}
cpu_timer.stop();
run_timer.stop();
......
......@@ -55,7 +55,7 @@ int main(int argc, char *argv[])
// double cg_time (cputime(0.0));
double eps (1.0e-6);
unsigned steps (4000);
CG (mat, b, x, eps, steps, 1);
CG (mat, b, x, eps, steps);
// cg_time = cputime(cg_time);
time(&end_time);
std::cout << " in " << steps << " iterations (residuum is " << eps << ") took " << /*cg_time <<*/ " sec time and " << (end_time-start_time) << " sec" << std::endl;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment