From b0a965eade9fba8f89e742ce360dd3261239380c Mon Sep 17 00:00:00 2001 From: Thomas Fischer <thomas.fischer@ufz.de> Date: Tue, 27 Sep 2011 16:46:00 +0200 Subject: [PATCH] - intergration of my CG project into OGS-6 - applying changes to interfaces --- CMakeLists.txt | 1 + MathLib/CMakeLists.txt | 15 +- MathLib/LinAlg/MatrixBase.h | 66 + .../Preconditioner/generateDiagPrecond.cpp | 29 + MathLib/LinAlg/Solvers/CG.cpp | 125 ++ MathLib/LinAlg/Solvers/CG.h | 130 ++ MathLib/LinAlg/Solvers/blas.h | 1696 +++++++++++++++++ MathLib/LinAlg/Solvers/solver.h | 18 + MathLib/{ => LinAlg/Sparse}/CRSMatrix.h | 13 +- .../Sparse}/CRSMatrixDiagPrecond.h | 0 MathLib/{ => LinAlg/Sparse}/CRSMatrixOpenMP.h | 6 +- .../{ => LinAlg/Sparse}/CRSMatrixPThreads.h | 4 + MathLib/{ => LinAlg/Sparse}/CRSSymMatrix.h | 0 MathLib/LinAlg/Sparse/CRSTranspose.h | 44 + MathLib/LinAlg/Sparse/SparseMatrixBase.h | 19 + MathLib/{ => LinAlg/Sparse}/amuxCRS.cpp | 15 +- MathLib/{ => LinAlg/Sparse}/amuxCRS.h | 0 MathLib/MathTools.cpp | 73 + MathLib/MathTools.h | 105 + MathLib/SparseMatrixBase.h | 21 - MathLib/sparse.h | 34 +- SimpleTests/MatrixTests/.MatMult.cpp.swp | Bin 12288 -> 0 bytes SimpleTests/MatrixTests/MatMult.cpp | 9 +- SimpleTests/SolverTests/CMakeLists.txt | 44 + ...onjugateGradientDiagonalPreconditioned.cpp | 75 + .../ConjugateGradientUnpreconditioned.cpp | 66 + 26 files changed, 2537 insertions(+), 71 deletions(-) create mode 100644 MathLib/LinAlg/MatrixBase.h create mode 100644 MathLib/LinAlg/Preconditioner/generateDiagPrecond.cpp create mode 100644 MathLib/LinAlg/Solvers/CG.cpp create mode 100644 MathLib/LinAlg/Solvers/CG.h create mode 100644 MathLib/LinAlg/Solvers/blas.h create mode 100644 MathLib/LinAlg/Solvers/solver.h rename MathLib/{ => LinAlg/Sparse}/CRSMatrix.h (85%) rename MathLib/{ => LinAlg/Sparse}/CRSMatrixDiagPrecond.h (100%) rename MathLib/{ => LinAlg/Sparse}/CRSMatrixOpenMP.h (80%) rename MathLib/{ => LinAlg/Sparse}/CRSMatrixPThreads.h (95%) rename MathLib/{ => LinAlg/Sparse}/CRSSymMatrix.h (100%) create mode 100644 MathLib/LinAlg/Sparse/CRSTranspose.h create mode 100644 MathLib/LinAlg/Sparse/SparseMatrixBase.h rename MathLib/{ => LinAlg/Sparse}/amuxCRS.cpp (95%) rename MathLib/{ => LinAlg/Sparse}/amuxCRS.h (100%) create mode 100644 MathLib/MathTools.cpp create mode 100644 MathLib/MathTools.h delete mode 100644 MathLib/SparseMatrixBase.h delete mode 100644 SimpleTests/MatrixTests/.MatMult.cpp.swp create mode 100644 SimpleTests/SolverTests/CMakeLists.txt create mode 100644 SimpleTests/SolverTests/ConjugateGradientDiagonalPreconditioned.cpp create mode 100644 SimpleTests/SolverTests/ConjugateGradientUnpreconditioned.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 56d6bf2f70a..e1fe3728fbd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,5 +29,6 @@ SET( LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib ) ADD_SUBDIRECTORY( Base ) ADD_SUBDIRECTORY( MathLib ) ADD_SUBDIRECTORY( SimpleTests/MatrixTests ) +ADD_SUBDIRECTORY( SimpleTests/SolverTests ) diff --git a/MathLib/CMakeLists.txt b/MathLib/CMakeLists.txt index 07597457dc7..fc1bdefbc3e 100644 --- a/MathLib/CMakeLists.txt +++ b/MathLib/CMakeLists.txt @@ -1,16 +1,17 @@ # Source files SET( HEADERS - amuxCRS.h - CRSMatrix.h - CRSMatrixPThreads.h - CRSMatrixOpenMP.h - CRSSymMatrix.h + LinAlg/MatrixBase.h + LinAlg/Sparse/amuxCRS.h + LinAlg/Sparse/CRSMatrix.h + LinAlg/Sparse/CRSMatrixPThreads.h + LinAlg/Sparse/CRSMatrixOpenMP.h + LinAlg/Sparse/CRSSymMatrix.h sparse.h - SparseMatrixBase.h + LinAlg/Sparse/SparseMatrixBase.h ) SET( SOURCES - amuxCRS.cpp + LinAlg/Sparse/amuxCRS.cpp ) INCLUDE_DIRECTORIES ( diff --git a/MathLib/LinAlg/MatrixBase.h b/MathLib/LinAlg/MatrixBase.h new file mode 100644 index 00000000000..77877db23e9 --- /dev/null +++ b/MathLib/LinAlg/MatrixBase.h @@ -0,0 +1,66 @@ +/* + * MatrixBase.h + * + * Created on: Sep 27, 2011 + * Author: TF + */ + +#ifndef MATRIXBASE_H_ +#define MATRIXBASE_H_ + +namespace MathLib { + +/** + * class MatrixBase is the basis for all matrix classes (dense and sparse) + */ +class MatrixBase { +public: + /** + * Constructor for initialization of the number of rows and columns + * @param nrows number of rows + * @param ncols number of columns + * @return + */ + MatrixBase(unsigned nrows=0, unsigned ncols=0) : + _n_rows(nrows), _n_cols(ncols) + {} + + /** + * copy constructor. + * @param original the object that is copied + * @return + */ + MatrixBase (MatrixBase const& original) : + _n_rows (original._n_rows), _n_cols (original._n_cols) + {} + + /** + * destructor of the class. + * @return + */ + virtual ~MatrixBase() {}; + /** + * get the number of rows + * @return the number of rows + */ + unsigned getNRows () const { return _n_rows; } + /** + * get the number of columns + * @return the number of columns + */ + unsigned getNCols () const { return _n_cols; } + +protected: + /** + * the number of rows + */ + unsigned _n_rows; + /** + * the number of columns + */ + unsigned _n_cols; +}; + +} + +#endif /* MATRIXBASE_H_ */ diff --git a/MathLib/LinAlg/Preconditioner/generateDiagPrecond.cpp b/MathLib/LinAlg/Preconditioner/generateDiagPrecond.cpp new file mode 100644 index 00000000000..4b52b83107b --- /dev/null +++ b/MathLib/LinAlg/Preconditioner/generateDiagPrecond.cpp @@ -0,0 +1,29 @@ +#include "sparse.h" + +bool generateDiagPrecond (unsigned n, double* A, unsigned* jA, unsigned* iA, + double* diag) +{ + unsigned idx; // first idx of next row + unsigned c; // column + unsigned j; + bool has_no_diag; + + for (unsigned r(0); r<n; ++r) { + idx=iA[r+1]; + has_no_diag=true; + for (j=iA[r]; j<idx && has_no_diag; ++j) { + c=jA[j]; + if (c==r) { + has_no_diag=false; + diag[r] = 1.0/A[j]; + } + } + if (j==idx && has_no_diag) { + std::cout << "row " << r << " has no diagonal element " << std::endl; + return false; + } + } + return true; +} + + diff --git a/MathLib/LinAlg/Solvers/CG.cpp b/MathLib/LinAlg/Solvers/CG.cpp new file mode 100644 index 00000000000..50cab766b9b --- /dev/null +++ b/MathLib/LinAlg/Solvers/CG.cpp @@ -0,0 +1,125 @@ +/* + * CG.cpp + * + * Created on: Sep 27, 2011 + * Author: TF + */ + +#include <limits> + +#include "MathTools.h" +#include "blas.h" +#include "../Sparse/CRSMatrix.h" +#include "../Sparse/CRSMatrixDiagPrecond.h" + +#ifndef NDEBUG +#include <iostream> +#endif + +// CG solves the symmetric positive definite linear +// system Ax=b using the Conjugate Gradient method. +// +// The return value indicates convergence within max_iter (input) +// iterations (0), or no convergence within max_iter iterations (1). +// +// Upon successful return, output arguments have the following values: +// +// x -- approximate solution to Ax = b +// nsteps -- the number of iterations performed before the +// tolerance was reached +// eps -- the residual after the final iteration + +namespace MathLib { + +unsigned CG(CRSMatrix<double> const * mat, double const * const b, + double* const x, double& eps, unsigned& nsteps, unsigned num_threads) +{ + unsigned N = mat->getNRows(); + double *p, *q, *r, *rhat, rho, rho1 = 0.0; + + p = new double[4* N ]; + q = p + N; + r = q + N; + rhat = r + N; + + double nrmb = sqrt(scpr(N, b, b, num_threads)); + if (nrmb < std::numeric_limits<double>::epsilon()) { + blas::setzero(N, x); + eps = 0.0; + nsteps = 0; + delete[] p; + return 0; + } + + // r0 = b - Ax0 + mat->amux(D_MONE, x, r); + for (unsigned k(0); k < N; k++) { + r[k] = b[k] - r[k]; + } + + double resid = blas::nrm2(N, r); + out << "0\t" << resid / nrmb << std::endl; + if (resid <= eps * nrmb) { + eps = resid / nrmb; + nsteps = 0; + delete[] p; + out.close(); + return 0; + } + + for (unsigned l = 1; l <= nsteps; ++l) { + + out << l << "\t" << resid / nrmb << std::endl; +#ifndef NDEBUG + std::cout << "Step " << l << ", resid=" << resid / nrmb << std::endl; +#endif + + // r^ = C r + blas::copy(N, r, rhat); + mat->precondApply(rhat); + + // rho = r * r^; + rho = scpr(r, rhat, N); // num_threads); + + if (l > 1) { + double beta = rho / rho1; + // p = r^ + beta * p + unsigned k; + omp_set_num_threads(num_threads); +#pragma omp parallel for + for (k = 0; k < N; k++) { + p[k] = rhat[k] + beta * p[k]; + } + } else blas::copy(N, rhat, p); + + // q = Ap + blas::setzero(N, q); + mat->amux(D_ONE, p, q); + + // alpha = rho / p*q + double alpha = rho / scpr(N, p, q); + + // x += alpha * p + blas::axpy(N, alpha, p, x); + + // r -= alpha * q + blas::axpy(N, -alpha, q, r); + + resid = sqrt(scpr(N, r, r)); + + if (resid <= eps * nrmb) { + eps = resid / nrmb; + nsteps = l; + delete[] p; + return 0; + } + + rho1 = rho; + } + out.close(); + eps = resid / nrmb; + delete[] p; + return 1; +} + +} // end namespace MathLib diff --git a/MathLib/LinAlg/Solvers/CG.h b/MathLib/LinAlg/Solvers/CG.h new file mode 100644 index 00000000000..5cf6dbf38d1 --- /dev/null +++ b/MathLib/LinAlg/Solvers/CG.h @@ -0,0 +1,130 @@ +/* + * CG.h + * + * Created on: Sep 27, 2011 + * Author: TF + */ + +#include "blas.h" +#include "CG.h" +#include <limits> +#include "CRSMatrix.h" +#include "CRSMatrixDiagPrecond.h" +#include "sparse.h" + +#include <fstream> + +#ifndef NDEBUG +#include <iostream> +#endif + +// CG solves the symmetric positive definite linear +// system Ax=b using the Conjugate Gradient method. +// +// The return value indicates convergence within max_iter (input) +// iterations (0), or no convergence within max_iter iterations (1). +// +// Upon successful return, output arguments have the following values: +// +// x -- approximate solution to Ax = b +// nsteps -- the number of iterations performed before the +// tolerance was reached +// eps -- the residual after the final iteration + + +unsigned CG(CRSMatrix<double> const * mat, double const * const b, + double* const x, double& eps, unsigned& nsteps, unsigned num_threads) +{ + unsigned N = mat->getNRows(); + double *p, *q, *r, *rhat, rho, rho1 = 0.0; + + p = new double[4*N]; + q = p + N; + r = q + N; + rhat = r + N; + + double nrmb = sqrt(scpr(N, b, b, num_threads)); + if (nrmb < std::numeric_limits<double>::epsilon()) { + blas::setzero(N, x); + eps = 0.0; + nsteps = 0; + delete [] p; + return 0; + } + + // r0 = b - Ax0 + mat->amux (D_MONE, x, r); + for (unsigned k(0); k<N; k++) { + r[k] = b[k] - r[k]; + } + + std::ofstream out ("residuen.txt"); + + double resid = blas::nrm2(N, r); + out << "0\t" << resid/nrmb << std::endl; + if (resid <= eps*nrmb) { + eps = resid / nrmb; + nsteps = 0; + delete [] p; + out.close(); + return 0; + } + + for (unsigned l=1; l<=nsteps; ++l) { + + out << l << "\t" << resid/nrmb << std::endl; +#ifndef NDEBUG + std::cout << "Step " << l << ", resid=" << resid/nrmb << std::endl; +#endif + + // r^ = C r + blas::copy(N, r, rhat); + mat->precondApply(rhat); + + // rho = r * r^; + rho = scpr(N, r, rhat, num_threads); + + if (l>1) { + double beta = rho / rho1; + // p = r^ + beta * p + unsigned k; + omp_set_num_threads (num_threads); + #pragma omp parallel for + for(k=0; k<N; k++) { + p[k] = rhat[k] + beta * p[k]; + } + } + else + blas::copy(N, rhat, p); + + // q = Ap + blas::setzero(N, q); + mat->amux (D_ONE, p, q); + + // alpha = rho / p*q + double alpha = rho / scpr(N, p, q); + + // x += alpha * p + blas::axpy(N, alpha, p, x); + + // r -= alpha * q + blas::axpy(N, -alpha, q, r); + + resid = sqrt(scpr(N,r,r)); + + if (resid<=eps*nrmb) { + eps = resid/nrmb; + nsteps = l; + delete [] p; + return 0; + } + + rho1 = rho; + } + out.close(); + eps = resid / nrmb; + delete [] p; + return 1; +} + + diff --git a/MathLib/LinAlg/Solvers/blas.h b/MathLib/LinAlg/Solvers/blas.h new file mode 100644 index 00000000000..231042bc9e3 --- /dev/null +++ b/MathLib/LinAlg/Solvers/blas.h @@ -0,0 +1,1696 @@ +#ifndef BLAS_H +#define BLAS_H + +#include <assert.h> +#include <stdlib.h> + +#include <cmath> + +#define SIGN(a) ((a) >= 0 ? 1.0 : -1.0) +#define SQR(a) ((a)*(a)) // Quadrat von a +inline double abs2(double a) { + return SQR(a); +} +inline double abs2(float a) { + return SQR(a); +} +#ifndef WIN32 +inline double abs(double a) { + return fabs(a); +} +inline float abs(float a) { + return fabsf(a); +} +#endif +#define MIN(a,b) ((a) <= (b) ? (a) : (b)) +#define MAX(a,b) ((a) >= (b) ? (a) : (b)) + +inline unsigned LTR(unsigned i, unsigned j, unsigned n) +{ + return ((2*n-j-1)*j)/2+i; +} +inline unsigned UTR(unsigned i, unsigned j) +{ + return (j*(j+1))/2+i; +} +inline unsigned GE(unsigned i, unsigned j, unsigned n) +{ + return i+j*n; +} + +const double D_ZERO = 0.0; +const double D_ONE = 1.0; +const double D_MONE = -1.0; +const float S_ZERO = 0.0; +const float S_ONE = 1.0; +const float S_MONE = -1.0; + +const double D_PREC = 1e-16; + +const unsigned N_ONE = 1; +const int N_MONE = -1; +const char JOB_STR[] = "NTOSVULCRA"; + +/* + #ifdef WIN32 + #define dcopy_ dcopy + #define dnrm2_ dnrm2 + #define dgemv_ dgemv + #define dspmv_ dspmv + #define dtpmv_ dtpmv + #define dgemm_ dgemm + #define daxpy_ daxpy + #define ddot_ ddot + #define dscal_ dscal + #define dgesvd_ dgesvd + #define dgeqrf_ dgeqrf + #define dormqr_ dormqr + #define dorgqr_ dorgqr + #define dgetri_ dgetri + #define dgetrf_ dgetrf + #define dsptri_ dsptri + #define dsptrf_ dsptrf + #define dtptrs_ dtptrs + #define dpptrf_ dpptrf + #define scopy_ scopy + #define snrm2_ snrm2 + #define sgemv_ sgemv + #define sspmv_ sspmv + #define stpmv_ stpmv + #define sgemm_ sgemm + #define saxpy_ saxpy + #define sdot_ sdot + #define sscal_ sscal + #define sgesvd_ sgesvd + #define sgeqrf_ sgeqrf + #define sormqr_ sormqr + #define sorgqr_ sorgqr + #define sgetri_ sgetri + #define sgetrf_ sgetrf + #define ssptri_ ssptri + #define ssptrf_ ssptrf + #define stptrs_ stptrs + #define spptrf_ spptrf + #define ccopy_ ccopy + #define scnrm2_ scnrm2 + #define cgemv_ cgemv + #define chpmv_ chpmv + #define ctpmv_ ctpmv + #define cgemm_ cgemm + #define caxpy_ caxpy + #define cdotc_ cdotc + #define cscal_ cscal + #define cgesvd_ cgesvd + #define cgeqrf_ cgeqrf + #define cunmqr_ cunmqr + #define cungqr_ cungqr + #define cgetri_ cgetri + #define cgetrf_ cgetrf + #define csptri_ csptri + #define csptrf_ csptrf + #define ctptrs_ ctptrs + #define cpptrf_ cpptrf + #define zcopy_ zcopy + #define dznrm2_ dznrm2 + #define zgemv_ zgemv + #define zhpmv_ zhpmv + #define ztpmv_ ztpmv + #define zgemm_ zgemm + #define zaxpy_ zaxpy + #define zdotc_ zdotc + #define zscal_ zscal + #define zgesvd_ zgesvd + #define zgeqrf_ zgeqrf + #define zunmqr_ zunmqr + #define zungqr_ zungqr + #define zgetri_ zgetri + #define zgetrf_ zgetrf + #define zsptri_ zsptri + #define zsptrf_ zsptrf + #define ztptrs_ ztptrs + #define zpptrf_ zpptrf + #endif +*/ + + +extern "C" +{ + /******************************************************************/ + //double precision real + /******************************************************************/ + unsigned idamax_(const unsigned*, const double*, const unsigned*); + void dcopy_(const unsigned*, const double*, const unsigned*, + double*, const unsigned*); + void daxpy_(const unsigned*, const double*, const double*, + const unsigned*, double*, const unsigned*); + void dscal_(const unsigned*, const double*, const double*, + const unsigned*); + double ddot_(const unsigned*, const double*, const unsigned*, + const double*, const unsigned*); + double dnrm2_(const unsigned*, const double* const, const unsigned*); + + void dgtsv_(const unsigned*, const unsigned*, const double*, + const double*, const double*, const double*, const unsigned*, + const int*); + void dgemm_(const char*, const char*, const unsigned*, const unsigned*, + const unsigned*, const double*, const double*, const unsigned*, + const double*, const unsigned*, const double*, double*, + const unsigned*); + void dger_(const unsigned*, const unsigned*, const double*, const double*, + const unsigned*, double*, const unsigned*, const double*, + const unsigned*); + void dgemv_(const char*, const unsigned*, const unsigned*, const double*, + const double*, const unsigned*, const double*, const unsigned*, + const double*, double*, const unsigned*); + void dorgqr_(const unsigned*, const unsigned*, const unsigned*, + double*, const unsigned*, double*, double*, + const unsigned*, int*); + void dormqr_(const char*, const char*, const unsigned*, const unsigned*, + const unsigned*, double*, const unsigned*, double*, + double*, const unsigned*, double*, const unsigned*, + int*); + void dsyev_(const char*, const char*, const unsigned*, double*, + const unsigned*, double*, double*, const unsigned*, int*); + void dgeqrf_(const unsigned*, const unsigned*, double*, const unsigned*, + double*, double*, const unsigned*, int*); + void dgeqp3_(const unsigned*, const unsigned*, const double*, + const unsigned*, const unsigned*, const double*, const double*, + const unsigned*, int*); + void dgeqpf_(const unsigned*, const unsigned*, const double*, + const unsigned*, const unsigned*, const double*, const double*, + const unsigned*, int*); + void dgesvd_(const char*, const char*, const unsigned*, const unsigned*, + double*, const unsigned*, double*, double*, const unsigned*, + double*, const unsigned*, double*, const unsigned*, int*); + void dgetrf_(const unsigned*, const unsigned*, double*, const unsigned*, + unsigned*, int*); + void dgetrs_(const char*, const unsigned*, const unsigned*, double*, + const unsigned*, const unsigned*, double*, const unsigned*, + int*); + void dgetri_(const unsigned*, double*, const unsigned*, unsigned*, double*, + const unsigned*, int*); + void dspmv_(const char*, const unsigned*, const double*, + const double*, const double*, const unsigned*, + const double*, double*, const unsigned*); + void dsptrf_(const char*, const unsigned*, const double*, int*, int*); + void dsptri_(const char*, const unsigned*, const double*, int*, double*, int*); + void dpotrf_(const char*, const unsigned*, const double*, const unsigned*, + int*); + void dpotri_(const char*, const unsigned*, const double*, const unsigned*, + int*); + void dpptrf_(const char*, const unsigned*, const double*, int*); + void dpptri_(const char*, const unsigned*, const double*, int*); + void dtptrs_(const char*, const char*, const char*, const unsigned*, + + const unsigned*, double*, const unsigned*, double*, + double*, const unsigned*, double*, const unsigned*, + int*); + void dtpsv_(const char*, const char*, const char*, const unsigned*, + double*, double*, const unsigned*); + void dtrtrs_(const char*, const char*, const char*, const unsigned*, + const unsigned*, const double*, const unsigned*, + double*, const unsigned*, int*); + void dtrsm_(const char*, const char*, const char*, const char*, + const unsigned*, const unsigned*, const double*, const double*, + const unsigned*, double*, const unsigned*); + void dtpmv_(const char*, const char*, const char*, const unsigned*, + const double*, double*, const unsigned*); + void dlacpy_(const char*, const unsigned*, const unsigned*, const double*, + const unsigned*, double*, const unsigned*); + void dlaset_(const char*, const unsigned*, const unsigned*, const double*, + const double*, double*, const unsigned*); + void dtrmm_(const char *, const char *, const char *, const char *, + unsigned *, unsigned *, const double *, double *, unsigned *, + double *, unsigned *); + void dswap_(const unsigned*, double*, const unsigned*, double*, + const unsigned*); + + /******************************************************************/ + //single precision real + /******************************************************************/ + unsigned isamax_(const unsigned*, const float*, const unsigned*); + void scopy_(const unsigned*, const float*, const unsigned*, + float*, const unsigned*); + void saxpy_(const unsigned*, const float*, const float*, + const unsigned*, float*, const unsigned*); + void sscal_(const unsigned*, const float*, const float*, + const unsigned*); + float sdot_(const unsigned*, const float*, const unsigned*, + const float*, const unsigned*); + float snrm2_(const unsigned*, const float* const, const unsigned*); + + void sgtsv_(const unsigned*, const unsigned*, const float*, + const float*, const float*, const float*, const unsigned*, + const int*); + void sgemm_(const char*, const char*, const unsigned*, const unsigned*, + const unsigned*, const float*, const float*, const unsigned*, + const float*, const unsigned*, const float*, float*, + const unsigned*); + void sger_(const unsigned*, const unsigned*, const float*, const float*, + const unsigned*, float*, const unsigned*, const float*, + const unsigned*); + void sgemv_(const char*, const unsigned*, const unsigned*, const float*, + const float*, const unsigned*, const float*, const unsigned*, + const float*, float*, const unsigned*); + void sorgqr_(const unsigned*, const unsigned*, const unsigned*, + float*, const unsigned*, float*, float*, + const unsigned*, int*); + void sormqr_(const char*, const char*, const unsigned*, const unsigned*, + const unsigned*, float*, const unsigned*, float*, + float*, const unsigned*, float*, const unsigned*, + int*); + void ssyev_(const char*, const char*, const unsigned*, float*, + const unsigned*, float*, float*, const unsigned*, int*); + void stpsv_(const char*, const char*, const char*, const unsigned*, + float*, float*, const unsigned*); + void sgeqrf_(const unsigned*, const unsigned*, float*, const unsigned*, + float*, float*, const unsigned*, int*); + void sgesvd_(const char*, const char*, const unsigned*, const unsigned*, + float*, const unsigned*, float*, float*, const unsigned*, + float*, const unsigned*, float*, const unsigned*, int*); + void sgetrf_(const unsigned*, const unsigned*, float*, const unsigned*, + unsigned*, int*); + void sgetri_(const unsigned*, float*, const unsigned*, unsigned*, float*, + const unsigned*, int*); + void sspmv_(const char*, const unsigned*, const float*, + const float*, const float*, const unsigned*, + const float*, float*, const unsigned*); + void strsm_(const char*, const char*, const char*, const char*, + const unsigned*, const unsigned*, const float*, const float*, + const unsigned*, float*, const unsigned*); + void ssptrf_(const char*, const unsigned*, const float*, int*, int*); + void ssptri_(const char*, const unsigned*, const float*, int*, float*, int*); + void spotrf_(const char*, const unsigned*, const float*, const unsigned*, + int*); + void spotri_(const char*, const unsigned*, const float*, const unsigned*, + int*); + void spptrf_(const char*, const unsigned*, const float*, int*); + void spptri_(const char*, const unsigned*, const float*, int*); + void stptrs_(const char*, const char*, const char*, const unsigned*, + const unsigned*, float*, float*, const unsigned*, int*); + void strtrs_(const char*, const char*, const char*, const unsigned*, + const unsigned*, const float*, const unsigned*, + float*, const unsigned*, int*); + void stpmv_(const char*, const char*, const char*, const unsigned*, + const float*, float*, const unsigned*); + void sswap_(const unsigned*, float*, const unsigned*, float*, + const unsigned*); +} + + +namespace blas +{ +// inline void conj(unsigned n, double* v) {} +// inline void conj(unsigned n, float* v) {} + + inline void swap(const unsigned n, double* x, const unsigned incx, + double* y, const unsigned incy ) + { + dswap_(&n, x, &incx, y, &incy); + } + + inline void swap(const unsigned n, float* x, const unsigned incx, + float* y, const unsigned incy ) + { + sswap_(&n, x, &incx, y, &incy); + } + + inline void laset(const unsigned m, const unsigned n, const double a, + const double b, double* A, unsigned ldA) + { + dlaset_(JOB_STR, &m, &n, &a, &b, A, &ldA); + } + inline void lasetu(const unsigned m, const unsigned n, const double a, + const double b, double* A, unsigned ldA) + { + dlaset_(JOB_STR+5, &m, &n, &a, &b, A, &ldA); + } + inline void lasetl(const unsigned m, const unsigned n, const double a, + const double b, double* A, unsigned ldA) + { + dlaset_(JOB_STR+6, &m, &n, &a, &b, A, &ldA); + } + + inline unsigned maxi(const unsigned n, double* const v) + { + return idamax_(&n, v, &N_ONE); + } + inline unsigned maxi(const unsigned n, float* const v) + { + return isamax_(&n, v, &N_ONE); + } + + inline void load(const unsigned n, double e, double* const v) + { + for (unsigned i=0; i<n; ++i) v[i] = e; + } + inline void load(const unsigned n, float e, float* const v) + { + for (unsigned i=0; i<n; ++i) v[i] = e; + } + + inline void setzero(const unsigned n, unsigned* const v) + { + for (unsigned i=0; i<n; ++i) v[i] = 0; + } + inline void setzero(const unsigned n, double* const v) + { + load(n, D_ZERO, v); + } + inline void setzero(const unsigned n, float* const v) + { + load(n, S_ZERO, v); + } + inline double nrm2(const unsigned n, const double* const v) + { + return dnrm2_(&n, v, &N_ONE); + } + inline float nrm2(const unsigned n, const float* const v) + { + return snrm2_(&n, v, &N_ONE); + } + + inline double diff2(const unsigned n, double* x, double* y) + { + double s = 0.0; + for (unsigned i=0; i<n; ++i) s += SQR(x[i]-y[i]); + return sqrt(s); + } + inline float diff2(const unsigned n, float* x, float* y) + { + float s = 0.0; + for (unsigned i=0; i<n; ++i) s += SQR(x[i]-y[i]); + return sqrtf(s); + } + inline void copy(const unsigned n, const double*const orig, double* dest) + { + dcopy_(&n, orig, &N_ONE, dest, &N_ONE); + } + inline void copy(const unsigned n, const float*const orig, float* dest) + { + scopy_(&n, orig, &N_ONE, dest, &N_ONE); + } + + inline void lacpy(const unsigned m, const unsigned n, double* A, + const unsigned ldA, double* B, const unsigned ldB) + { + dlacpy_(JOB_STR, &m, &n, A, &ldA, B, &ldB); + } + inline void lacpyu(const unsigned m, const unsigned n, double* A, + const unsigned ldA, double* B, const unsigned ldB) + { + dlacpy_(JOB_STR+5, &m, &n, A, &ldA, B, &ldB); + } + + + inline void copy(const unsigned n, double* orig, const unsigned inco, + double* dest, const unsigned incd) + { + dcopy_(&n, orig, &inco, dest, &incd); + } + inline void copy(const unsigned n, float* orig, const unsigned inco, + float* dest, const unsigned incd) + { + scopy_(&n, orig, &inco, dest, &incd); + } + + // Conv.Copy double2float + inline void copy(const unsigned n, double* orig, float* dest) + { + for (unsigned i=0; i<n; i++) dest[i] = (float) orig[i]; + } + + // Conv.Copy float2double + inline void copy(const unsigned n, float* orig, double* dest) + { + for (unsigned i=0; i<n; i++) dest[i] = (double) orig[i]; + } + + // Scalar product conj(x)*y + inline double scpr(const unsigned n, const double* const v1, + const double* const v2) + { + return ddot_(&n, v1, &N_ONE, v2, &N_ONE); + } + inline float scpr(const unsigned n, const float* const v1, + const float* const v2) + { + return sdot_(&n, v1, &N_ONE, v2, &N_ONE); + } + + inline double sqrsum(const unsigned n, double* const v) + { + return ddot_(&n, v, &N_ONE, v, &N_ONE); + } + + inline float sqrsum(const unsigned n, float* const v) + { + return sdot_(&n, v, &N_ONE, v, &N_ONE); + } + + inline void add(const unsigned n, double* const x, double* const y) + { + daxpy_(&n, &D_ONE, x, &N_ONE, y, &N_ONE); + } + inline void add(const unsigned n, float* const x, float* const y) + { + saxpy_(&n, &S_ONE, x, &N_ONE, y, &N_ONE); + } + + inline void axpy(const unsigned n, const double d, const double* const x, + double* const y) + { + daxpy_(&n, &d, x, &N_ONE, y, &N_ONE); + } + inline void axpy(const unsigned n, const float d, const float* const x, + float* const y) + { + saxpy_(&n, &d, x, &N_ONE, y, &N_ONE); + } + + inline void scal(const unsigned n, const double d, double* const x, unsigned incx) + { dscal_(&n, &d, x, &incx); } + inline void scal(const unsigned n, const float d, float* const x, unsigned incx) + { sscal_(&n, &d, x, &incx); } + + template<class T> + inline void scal(const unsigned n, const T d, T* const x) + { scal(n, d, x, N_ONE); } + + template<class T> inline void normalize(unsigned n, T* x) + { + T s = 1.0/blas::nrm2(n, x); + blas::scal(n, s, x); + } + + template<class T> inline void mkOrth(unsigned n, const T* v1, T* v2) + { + T s = -blas::scpr(n, v1, v2); + blas::axpy(n, s, v1, v2); + } + + template<class T> + inline void scal(const unsigned n, const T d, T* const x, T* const y) + { + for (unsigned i=0; i<n; ++i) y[i] = d * x[i]; + } + + + + + // y = d Ax + inline void gemv(const unsigned m, const unsigned n, double d, + const double* A, double *x, double *y) + { + dgemv_(JOB_STR, &m, &n, &d, A, &m, x, &N_ONE, &D_ZERO, y, &N_ONE); + } + inline void gemv(const unsigned m, const unsigned n, float d, const float* A, + float *x, float *y) + { + sgemv_(JOB_STR, &m, &n, &d, A, &m, x, &N_ONE, &S_ZERO, y, &N_ONE); + } + + // y += d Ax + inline void gemva(const unsigned m, const unsigned n, double d, const double* A, + const double *x, double *y) + { + dgemv_(JOB_STR, &m, &n, &d, A, &m, x, &N_ONE, &D_ONE, y, &N_ONE); + } + inline void gemva(const unsigned m, const unsigned n, float d, const float* A, + const float *x, float *y) + { + sgemv_(JOB_STR, &m, &n, &d, A, &m, x, &N_ONE, &S_ONE, y, &N_ONE); + } + + // y = d A^H x + inline void gemhv(const unsigned m, const unsigned n, double d, const double* A, + const double *x, double *y) + { + dgemv_(JOB_STR+1, &m, &n, &d, A, &m, x, &N_ONE, &D_ZERO, y, &N_ONE); + } + inline void gemhv(const unsigned m, const unsigned n, float d, const float* A, + const float *x, float *y) + { + sgemv_(JOB_STR+1, &m, &n, &d, A, &m, x, &N_ONE, &S_ZERO, y, &N_ONE); + } + + // y += d A^H x + inline void gemhva(const unsigned m, const unsigned n, double d, + const double* A, unsigned ldA, const double *x, unsigned incx, + double *y, unsigned incy) + { + dgemv_(JOB_STR+1, &m, &n, &d, A, &ldA, x, &incx, &D_ONE, y, &incy); + } + inline void gemhva(const unsigned m, const unsigned n, float d, + const float* A, unsigned ldA, const float *x, unsigned incx, + float *y, unsigned incy) + { + sgemv_(JOB_STR+1, &m, &n, &d, A, &ldA, x, &incx, &S_ONE, y, &incy); + } + + inline void gemhva(const unsigned m, const unsigned n, double d, const double* A, + const double *x, double *y) + { gemhva(m, n, d, A, m, x, N_ONE, y, N_ONE); } + inline void gemhva(const unsigned m, const unsigned n, float d, const float* A, + const float *x, float *y) + { gemhva(m, n, d, A, m, x, N_ONE, y, N_ONE); } + + // y += d A x (A symm. dense in packed format) + inline void gemsva(const unsigned n, double d, double* A, + double *x, double *y) + { + dspmv_(JOB_STR+5, &n, &d, A, x, &N_ONE, &D_ONE, y, &N_ONE); + } + inline void gemsva(const unsigned n, float d, float* A, + float *x, float *y) + { + sspmv_(JOB_STR+5, &n, &d, A, x, &N_ONE, &S_ONE, y, &N_ONE); + } + + // sovles Ax=B, A is a triangluar Matrix + inline void gtsv(const unsigned* n, const double* DiagLower, + const double* Diag, const double* DiagUpper, + const double* B, const int* INFO) + { + dgtsv_(n, &N_ONE, DiagLower, Diag, DiagUpper, B, n, INFO); + } + inline void gtsv(const unsigned* n, const float* DiagLower, + const float* Diag, const float* DiagUpper, + const float* B, const int* INFO) + { + sgtsv_(n, &N_ONE, DiagLower, Diag, DiagUpper, B, n, INFO); + } + + // C = d A B, A is m x p, B is p x n + inline void gemm(const unsigned m, const unsigned p, const unsigned n, + const double d, const double* const A, const unsigned ldA, + const double* const B, const unsigned ldB, + double* C, const unsigned ldC) + { + dgemm_(JOB_STR, JOB_STR, &m, &n, &p, &d, A, &ldA, B, &ldB, + &D_ZERO, C, &ldC); + } + inline void gemm(const unsigned m, const unsigned p, const unsigned n, + const float d, const float* const A, const unsigned ldA, + const float* const B, const unsigned ldB, + float* C, const unsigned ldC) + { + sgemm_(JOB_STR, JOB_STR, &m, &n, &p, &d, A, &ldA, B, &ldB, + &S_ZERO, C, &ldC); + } + + // C += d A B, A is m x p, B is p x n + inline void gemma(const unsigned m, const unsigned p, const unsigned n, + const double d, const double* const A, const unsigned ldA, + const double* const B, const unsigned ldB, + double* C, const unsigned ldC) + { + dgemm_(JOB_STR, JOB_STR, &m, &n, &p, &d, A, &ldA, B, &ldB, + &D_ONE, C, &ldC); + } + inline void gemma(const unsigned m, const unsigned p, const unsigned n, + const float d, const float* const A, const unsigned ldA, + const float* const B, const unsigned ldB, + float* C, const unsigned ldC) + { + sgemm_(JOB_STR, JOB_STR, &m, &n, &p, &d, A, &ldA, B, &ldB, + &S_ONE, C, &ldC); + } + + // C = d A^H B, A is m x p, B is m x n + inline void gemhm(const unsigned m, const unsigned p, const unsigned n, + const double d, const double* A, const unsigned ldA, + const double *B, const unsigned ldB, + double* C, const unsigned ldC) + { + dgemm_(JOB_STR+1, JOB_STR, &p, &n, &m, &d, A, &ldA, B, &ldB, + &D_ZERO, C, &ldC); + } + inline void gemhm(const unsigned m, const unsigned p, const unsigned n, + const float d, const float* const A, const unsigned ldA, + const float* const B, const unsigned ldB, + float* C, const unsigned ldC) + { + sgemm_(JOB_STR+1, JOB_STR, &p, &n, &m, &d, A, &ldA, B, &ldB, + &S_ZERO, C, &ldC); + } + + // C += d A^H B, A is m x p, B is m x n + inline void gemhma(unsigned m, unsigned p, unsigned n, double d, + const double* const A, const unsigned ldA, const double* const B, + const unsigned ldB, double* C, unsigned ldC) + { + dgemm_(JOB_STR+1, JOB_STR, &p, &n, &m, &d, A, &ldA, B, &ldB, + &D_ONE, C, &ldC); + } + inline void gemhma(unsigned m, unsigned p, unsigned n, float d, + const float* const A, const unsigned ldA, const float* const B, + const unsigned ldB, float* C, unsigned ldC) + { + sgemm_(JOB_STR+1, JOB_STR, &p, &n, &m, &d, A, &ldA, B, &ldB, + &S_ONE, C, &ldC); + } + + // C = d A B^H, A is m x p, B is n x p + inline void gemmh(const unsigned m, const unsigned p, const unsigned n, + const double d, const double* const A, const unsigned ldA, + const double* const B, const unsigned ldB, + double* C, const unsigned ldC) + { + dgemm_(JOB_STR, JOB_STR+1, &m, &n, &p, &d, A, &ldA, B, &ldB, + &D_ZERO, C, &ldC); + } + inline void gemmh(const unsigned m, const unsigned p, const unsigned n, + const float d, const float* const A, const unsigned ldA, + const float* const B, const unsigned ldB, + float* C, const unsigned ldC) + { + sgemm_(JOB_STR, JOB_STR+1, &m, &n, &p, &d, A, &ldA, B, &ldB, + &S_ZERO, C, &ldC); + } + + // C += d A B^H, A is m x p, B is n x p + inline void gemmha(const unsigned m, const unsigned p, const unsigned n, + const double d, const double* const A, const unsigned ldA, + const double* const B, const unsigned ldB, + double* C, const unsigned ldC) + { + dgemm_(JOB_STR, JOB_STR+1, &m, &n, &p, &d, A, &ldA, B, &ldB, + &D_ONE, C, &ldC); + } + inline void gemmha(const unsigned m, const unsigned p, const unsigned n, + const float d, const float* const A, const unsigned ldA, + const float* const B, const unsigned ldB, + float* C, const unsigned ldC) + { + sgemm_(JOB_STR, JOB_STR+1, &m, &n, &p, &d, A, &ldA, B, &ldB, + &S_ONE, C, &ldC); + } + inline void gemmha(const unsigned m, const unsigned p, const unsigned n, + const double* const A, const unsigned ldA, + const double* const B, const unsigned ldB, + double* C, const unsigned ldC) + { + dgemm_(JOB_STR, JOB_STR+1, &m, &n, &p, &D_ONE, A, &ldA, B, &ldB, + &D_ONE, C, &ldC); + } + inline void gemmha(const unsigned m, const unsigned p, const unsigned n, + const float* const A, const unsigned ldA, + const float* const B, const unsigned ldB, + float* C, const unsigned ldC) + { + sgemm_(JOB_STR, JOB_STR+1, &m, &n, &p, &S_ONE, A, &ldA, B, &ldB, + &S_ONE, C, &ldC); + } + + // C = d A^H B^H, A is p x m, B is n x p + inline void gemhmh(const unsigned m, const unsigned p, const unsigned n, + const double d, const double* const A, const unsigned ldA, + const double* const B, const unsigned ldB, + double* C, const unsigned ldC) + { + dgemm_(JOB_STR+1, JOB_STR+1, &m, &n, &p, &d, A, &ldA, B, &ldB, + &D_ZERO, C, &ldC); + } + inline void gemhmh(const unsigned m, const unsigned p, const unsigned n, + const float d, const float* const A, const unsigned ldA, + const float* const B, const unsigned ldB, + float* C, const unsigned ldC) + { + sgemm_(JOB_STR+1, JOB_STR+1, &m, &n, &p, &d, A, &ldA, B, &ldB, + &S_ZERO, C, &ldC); + } + + //C += d*AB, A is mxm (packed upper half is stored), B is mxn and regular matrix + inline void sygemma(const unsigned m, const unsigned n, + const double* const A, const double* const B, + const double d, double* const C) + { + for(unsigned i=0;i<m;i++){ + for(unsigned j=0;j<n;j++){ + for(unsigned k=i;k<m;k++){ + if(i==k){ + C[j*m+i] += d*A[i+k*(k+1)/2]*B[k+j*m]; + }else{ + C[j*m+i] += d*A[i+k*(k+1)/2]*B[k+j*m]; + C[j*m+k] += d*A[i+k*(k+1)/2]*B[i+j*m]; + } + } + } + } + } + inline void sygemma(const unsigned m, const unsigned n, + const float* const A, const float* const B, + const float d, float* const C) + { + for(unsigned i=0;i<m;i++){ + for(unsigned j=0;j<n;j++){ + for(unsigned k=i;k<m;k++){ + if(i==k){ + C[j*m+i] += d*A[i+k*(k+1)/2]*B[k+j*m]; + }else{ + C[j*m+i] += d*A[i+k*(k+1)/2]*B[k+j*m]; + C[j*m+k] += d*A[i+k*(k+1)/2]*B[i+j*m]; + } + } + } + } + } + + //C += d*AB, A is mxn and regular matrix, B is nxn (packed upper half is stored) + inline void gesymma(const unsigned m, const unsigned n, + const double* const A, const double* const B, + const double d, double* const C) + { + for(unsigned i=0;i<m;i++){ + for(unsigned j=0;j<n;j++){ + for(unsigned k=j;k<n;k++){ + if(j==k) + C[j*m+i] += d*A[i+k*m]*B[k+j*(j+1)/2]; + else{ + C[j*m+i] += d*A[i+k*m]*B[j+k*(k+1)/2]; + C[k*m+i] += d*A[i+j*m]*B[j+k*(k+1)/2]; + } + } + } + } + } + inline void gesymma(const unsigned m, const unsigned n, + const float* const A, const float* const B, + const float d, float* const C) + { + for(unsigned i=0;i<m;i++){ + for(unsigned j=0;j<n;j++){ + for(unsigned k=j;k<n;k++){ + if(j==k) + C[j*m+i] += d*A[i+k*m]*B[k+j*(j+1)/2]; + else{ + C[j*m+i] += d*A[i+k*m]*B[j+k*(k+1)/2]; + C[k*m+i] += d*A[i+j*m]*B[j+k*(k+1)/2]; + } + } + } + } + } + + // C += d A^H A, C is a symm. matrix (packed upper half is stored), A is mxn + inline void symhm(const unsigned m, const unsigned n, const double* const A, + const double d, double* C) + { + for (unsigned j=0; j<n; ++j) { + for (unsigned i=0; i<=j; ++i) { + double sum = 0.0; + for (unsigned k=0; k<m; ++k) sum += A[k+i*m] * A[k+j*m]; + C[i+j*(j+1)/2] += d * sum; + } + } + } + inline void symhm(const unsigned m, const unsigned n, const float* const A, + const float d, float* C) + { + for (unsigned j=0; j<n; ++j) { + for (unsigned i=0; i<=j; ++i) { + float sum = 0.0; + for (unsigned k=0; k<m; ++k) sum += A[k+i*m] * A[k+j*m]; + C[i+j*(j+1)/2] += d * sum; + } + } + } + + // C += d A A^H, C is a symm. matrix (packed upper half is stored), A is mxn + inline void symmh(unsigned m, unsigned n, double* A, double d, double* C) + { + for (unsigned k=0; k<n; ++k) { + for (unsigned j=0; j<=n; ++j) { + double e = d * A[j+k*m]; + for (unsigned i=0; i<j; ++i) C[i+j*(j+1)/2] += e * A[i+k*m]; + } + } + } + inline void symmh(unsigned m, unsigned n, float* A, float d, float* C) + { + for (unsigned k=0; k<n; ++k) { + for (unsigned j=0; j<n; ++j) { + float e = d * A[j+k*m]; + for (unsigned i=0; i<=j; ++i) C[i+j*(j+1)/2] += e * A[i+k*m]; + } + } + } + + // Singular Value Decomposition + inline int gesvdS(unsigned m, unsigned n, double* A, double* S, + double* U, unsigned ldU, double* VT, unsigned ldVT, + unsigned nwk, double* wk) + { + int INF; + dgesvd_(JOB_STR+3, JOB_STR+3, &m, &n, A, &m, S, U, &ldU, VT, &ldVT, + wk, &nwk, &INF); + return INF; + } + inline int gesvd(unsigned m, unsigned n, double* A, double* S, + double* VT, unsigned ldVT, unsigned nwk, double* wk) + { + int INF; + dgesvd_(JOB_STR+2, JOB_STR+3, &m, &n, A, &m, S, A, &m, VT, &ldVT, + wk, &nwk, &INF); + return INF; + } + inline int gesvd(unsigned m, unsigned n, float* A, float* S, + float* VT, unsigned ldVT, unsigned nwk, float* wk) + { + int INF; + sgesvd_(JOB_STR+2, JOB_STR+3, &m, &n, A, &m, S, A, &m, VT, &ldVT, + wk, &nwk, &INF); + return INF; + } + inline int gesvd(unsigned m, unsigned n, float* A, double* S, + float* VT, unsigned ldVT, unsigned nwk, float* wk) + { + int INF; + // workaround (needs to be improved) + float* Sf = new float[MIN(m,n)]; + for(unsigned i=0;i<MIN(m,n);i++) + Sf[i] = (float)S[i]; + sgesvd_(JOB_STR+2, JOB_STR+3, &m, &n, A, &m, Sf, A, &m, VT, &ldVT, + wk, &nwk, &INF); + delete [] Sf; + return INF; + } + inline int gesvd(unsigned m, unsigned n, double* A, double* S, + double* U, unsigned ldU, double* VT, unsigned ldVT, + unsigned nwk, double* wk) + { + int INF; + dgesvd_(JOB_STR+9, JOB_STR+9, &m, &n, A, &m, S, U, &ldU, VT, &ldVT, + wk, &nwk, &INF); + return INF; + } + inline int gesvd(unsigned m, unsigned n, float* A, double* S, + float* U, unsigned ldU, float* VT, unsigned ldVT, + unsigned nwk, float* wk) + { + int INF; + // workaround (needs to be improved) + float* Sf = new float[MIN(m,n)]; + for(unsigned i=0;i<MIN(m,n);i++) + Sf[i] = (float)S[i]; + sgesvd_(JOB_STR+9, JOB_STR+9, &m, &n, A, &m, Sf, U, &ldU, VT, &ldVT, + wk, &nwk, &INF); + delete [] Sf; + return INF; + } + + // compute singular values + inline int svals(unsigned m, unsigned n, double* A, double* S, + unsigned nwk, double* wk) + { + int INF; + dgesvd_(JOB_STR, JOB_STR, &m, &n, A, &m, S, A, &m, A, &n, wk, &nwk, &INF); + return INF; + } + inline int svals(unsigned m, unsigned n, float* A, float* S, + unsigned nwk, float* wk) + { + int INF; + sgesvd_(JOB_STR, JOB_STR, &m, &n, A, &m, S, A, &m, A, &n, wk, &nwk, &INF); + return INF; + } + + // triangular factorisation + inline int getrf(const unsigned n, double* A, unsigned* ipiv) + { + int INF; + dgetrf_(&n, &n, A, &n, ipiv, &INF); + return INF; + } + inline int getrf(const unsigned n, float* A, unsigned* ipiv) + { + int INF; + sgetrf_(&n, &n, A, &n, ipiv, &INF); + return INF; + } + + // upper triangular packed MV + inline void utrpv(const unsigned n, double* A, double* x) + { + dtpmv_(JOB_STR+5, JOB_STR, JOB_STR, &n, A, x, &N_ONE); + } + inline void utrpv(const unsigned n, float* A, float* x) + { + stpmv_(JOB_STR+5, JOB_STR, JOB_STR, &n, A, x, &N_ONE); + } + + // lower triangular packed transpose MV + inline void ltrphv(const unsigned n, double* A, double* x) + { + dtpmv_(JOB_STR+6, JOB_STR+1, JOB_STR+5, &n, A, x, &N_ONE); + } + inline void ltrphv(const unsigned n, float* A, float* x) + { + stpmv_(JOB_STR+6, JOB_STR+1, JOB_STR+5, &n, A, x, &N_ONE); + } + + // QR factorisation + inline int geqrf(const unsigned m, const unsigned n, double* A, + double* tau, unsigned nwk, double* wk) + { + int INF; + dgeqrf_(&m, &n, A, &m, tau, wk, &nwk, &INF); + return INF; + } + inline int geqrf(const unsigned m, const unsigned n, float* A, + float* tau, unsigned nwk, float* wk) + { + int INF; + sgeqrf_(&m, &n, A, &m, tau, wk, &nwk, &INF); + return INF; + } + inline int geqrf(const unsigned m, const unsigned n, double* A, + const unsigned ldA, double* tau, unsigned nwk, double* wk) + { + int INF; + dgeqrf_(&m, &n, A, &ldA, tau, wk, &nwk, &INF); + return INF; + } + inline int geqrf(const unsigned m, const unsigned n, float* A, + const unsigned ldA, float* tau, unsigned nwk, float* wk) + { + int INF; + sgeqrf_(&m, &n, A, &ldA, tau, wk, &nwk, &INF); + return INF; + } + + // Multiply a general Matrix with the Q-Matrix (QR factorization), Q C + inline int ormqr(const unsigned m, const unsigned n, const unsigned p, + double* A, double* tau, double* C, + unsigned nwk, double* wk) + { + int INF; + dormqr_(JOB_STR+6, JOB_STR, &m, &n, &p, A, &m, tau, C, &m, wk, &nwk, &INF); + return INF; + } + inline int ormqr(const unsigned m, const unsigned n, const unsigned p, + float* A, float* tau, float* C, + unsigned nwk, float* wk) + { + int INF; + sormqr_(JOB_STR+6, JOB_STR, &m, &n, &p, A, &m, tau, C, &m, wk, &nwk, &INF); + return INF; + } + inline int ormqr(const unsigned m, const unsigned n, const unsigned p, + double* A, const unsigned ldA, double* tau, double* C, + const unsigned ldC, unsigned nwk, double* wk) + { + int INF; + dormqr_(JOB_STR+6, JOB_STR, &m, &n, &p, A, &ldA, tau, C, &ldC, wk, &nwk, + &INF); + return INF; + } + inline int ormqr(const unsigned m, const unsigned n, const unsigned p, + float* A, const unsigned ldA, float* tau, float* C, + const unsigned ldC, unsigned nwk, float* wk) + { + int INF; + sormqr_(JOB_STR+6, JOB_STR, &m, &n, &p, A, &ldA, tau, C, &ldC, wk, &nwk, + &INF); + return INF; + } + + // Q^H C + inline int ormqrh(const unsigned m, const unsigned n, const unsigned p, + double* A, const unsigned ldA, double* tau, double* C, + const unsigned ldC, unsigned nwk, double* wk) + { + int INF; + dormqr_(JOB_STR+6, JOB_STR+1, &m, &n, &p, A, &ldA, tau, C, &ldC, wk, &nwk, + &INF); + return INF; + } + inline int ormqrh(const unsigned m, const unsigned n, const unsigned p, + float* A, const unsigned ldA, float* tau, float* C, + const unsigned ldC, unsigned nwk, float* wk) + { + int INF; + sormqr_(JOB_STR+6, JOB_STR+1, &m, &n, &p, A, &ldA, tau, C, &ldC, wk, &nwk, + &INF); + return INF; + } + + inline int ormqrh(const unsigned m, const unsigned n, const unsigned p, + double* A, const unsigned ldA, double* tau, double* C, + unsigned nwk, double* wk) + { + int INF; + dormqr_(JOB_STR+6, JOB_STR+1, &m, &n, &p, A, &ldA, tau, C, &m, wk, &nwk, + &INF); + return INF; + } + inline int ormqrh(const unsigned m, const unsigned n, const unsigned p, + float* A, const unsigned ldA, float* tau, float* C, + unsigned nwk, float* wk) + { + int INF; + sormqr_(JOB_STR+6, JOB_STR+1, &m, &n, &p, A, &ldA, tau, C, &m, wk, &nwk, + &INF); + return INF; + } + + inline int ormqrh(const unsigned m, const unsigned n, const unsigned p, + double* A, double* tau, double* C, + unsigned nwk, double* wk) + { + int INF; + dormqr_(JOB_STR+6, JOB_STR+1, &m, &n, &p, A, &m, tau, C, &m, wk, &nwk, &INF); + return INF; + } + inline int ormqrh(const unsigned m, const unsigned n, const unsigned p, + float* A, float* tau, float* C, + unsigned nwk, float* wk) + { + int INF; + sormqr_(JOB_STR+6, JOB_STR+1, &m, &n, &p, A, &m, tau, C, &m, wk, &nwk, &INF); + return INF; + } + inline int morqr(const unsigned m, const unsigned n, const unsigned p, + double* A, const unsigned ldA, double* tau, double* C, + const unsigned ldC, unsigned nwk, double* wk) + { + int INF; + dormqr_(JOB_STR+8, JOB_STR, &m, &n, &p, A, &ldA, tau, C, &ldC, wk, &nwk, + &INF); + return INF; + } + inline int morqr(const unsigned m, const unsigned n, const unsigned p, + float* A, const unsigned ldA, float* tau, float* C, + const unsigned ldC, unsigned nwk, float* wk) + { + int INF; + sormqr_(JOB_STR+8, JOB_STR, &m, &n, &p, A, &ldA, tau, C, &ldC, wk, &nwk, + &INF); + return INF; + } + + inline void ger(unsigned M, unsigned N, double d, double* X, unsigned INCX, + double* y, unsigned INCY, double* A, unsigned LDA) + { + dger_(&M, &N, &d, X, &INCX, y, &INCY, A, &LDA); + } + + inline void ger(unsigned M, unsigned N, float d, float* X, unsigned INCX, + float* y, unsigned INCY, float* A, unsigned LDA) + { + sger_(&M, &N, &d, X, &INCX, y, &INCY, A, &LDA); + } + + // return Q-Matrix (QR factorization) in A + inline int orgqr(const unsigned m, const unsigned n, double* A, double* tau, + unsigned nwk, double* wk) + { + int INF; + dorgqr_(&m, &n, &n, A, &m, tau, wk, &nwk, &INF); + return INF; + } + inline int orgqr(const unsigned m, const unsigned n, float* A, float* tau, + unsigned nwk, float* wk) + { + int INF; + sorgqr_(&m, &n, &n, A, &m, tau, wk, &nwk, &INF); + return INF; + } + + // B=A^H, A in R^(m,n) + inline void transpose(unsigned m, unsigned n, double* A, double* B) + { + for (unsigned i=0; i<m; ++i) dcopy_(&n, A+i, &m, B+i*n, &N_ONE); + } + inline void transpose(unsigned m, unsigned n, float* A, float* B) + { + for (unsigned i=0; i<m; ++i) scopy_(&n, A+i, &m, B+i*n, &N_ONE); + } + + // product of upper triangular and regular Matrix M = R A, R mxp, A nxp + inline void utrgemmh(const unsigned m, const unsigned p, const unsigned n, + const double* const R, const unsigned ldR, + const double* const A, const unsigned ldA, + double* const M, const unsigned ldM) + { + for (unsigned i=0; i<m; ++i) { + for (unsigned j=0; j<n; ++j) { + double d = D_ZERO; + for(unsigned l=i; l<p; ++l) + d += R[i+l*ldR]*A[j+l*ldA]; + M[i+j*ldM] = d; + } + } + } + inline void utrgemmh(const unsigned m, const unsigned p, const unsigned n, + const float* const R, const unsigned ldR, + const float* const A, const unsigned ldA, + float* const M, const unsigned ldM) + { + for (unsigned i=0; i<m; ++i) { + for (unsigned j=0; j<n; ++j) { + float d = S_ZERO; + for(unsigned l=i; l<p; ++l) + d += R[i+l*ldR]*A[j+l*ldA]; + M[i+j*ldM] = d; + } + } + } + + // product of two upper triangular matrices M = R1 R2^T, R1 mxp, R2 nxp + inline void utrmmh(const unsigned m, const unsigned p, const unsigned n, + const double* const R1, const unsigned ldR1, + const double* const R2, const unsigned ldR2, + double* const M) + { + for (unsigned j=0; j<n; ++j) { + for (unsigned i=0; i<m; ++i) { + double d = D_ZERO; + unsigned ij = MAX(i,j); + for (unsigned l=ij; l<p; ++l) + d += R1[i+l*ldR1]*R2[j+l*ldR2]; + M[i+j*m] = d; + } + } + } + inline void utrmmh(const unsigned m, const unsigned p, const unsigned n, + const float* const R1, const unsigned ldR1, + const float* const R2, const unsigned ldR2, + float* const M) + { + for (unsigned j=0; j<n; ++j) { + for (unsigned i=0; i<m; ++i) { + float d = S_ZERO; + unsigned ij = MAX(i,j); + for (unsigned l=ij; l<p; ++l) + d += R1[i+l*ldR1]*R2[j+l*ldR2]; + M[i+j*m] = d; + } + } + } + // product of two upper triangular matrices M += R1 R2^T, R1 mxp, R2 nxp + inline void utrmmha(const unsigned m, const unsigned p, const unsigned n, + const double* const R1, const unsigned ldR1, + const double* const R2, const unsigned ldR2, + double* const M) + { + for(unsigned l=0; l<p; l++){ + for(unsigned j=p; j<n; j++){ + for(unsigned i=p; i<m; i++){ + M[i+j*m] += R1[i+l*ldR1]*R2[j+l*ldR2]; + } + } + } + } + inline void utrmmha(const unsigned m, const unsigned p, const unsigned n, + const float* const R1, const unsigned ldR1, + const float* const R2, const unsigned ldR2, + float* const M) + { + for(unsigned l=0; l<p; l++){ + for(unsigned j=p; j<n; j++){ + for(unsigned i=p; i<m; i++){ + M[i+j*m] += R1[i+l*ldR1]*R2[j+l*ldR2]; + } + } + } + } + + // product of an upper triangular matrix U and a matrix A, A:=U A + inline void utrgemm(unsigned m, unsigned n, double* U, unsigned ldU, + double* A, unsigned ldA) + { + dtrmm_(JOB_STR+6, JOB_STR+5, JOB_STR, JOB_STR, &m, &n, &D_ONE, U, &ldU, + A, &ldA); + } + + // A:=A U^H + inline void geutrmm(unsigned m, unsigned n, double* A, unsigned ldA, + double* U, unsigned ldU) + { + dtrmm_(JOB_STR+8, JOB_STR+5, JOB_STR+1, JOB_STR, &m, &n, &D_ONE, U, &ldU, + A, &ldA); + } + + // C += d A U^T, U upper triangular matrix in packed storage + inline void geputrmmh(double d, unsigned m, unsigned n, double* A, + double* U, double* C) + { + for (unsigned j=0; j<n; ++j) + for (unsigned l=j; l<n; ++l) { + double e = d*U[j+l*(l+1)/2]; + for (unsigned i=0; i<m; ++i) C[i+j*m] += e*A[i+l*m]; + } + } + + inline void geputrmmh(float d, unsigned m, unsigned n, float* A, + float* U, float* C) + { + for (unsigned j=0; j<n; ++j) + for (unsigned l=j; l<n; ++l) { + float e = d*U[j+l*(l+1)/2]; + for (unsigned i=0; i<m; ++i) C[i+j*m] += e*A[i+l*m]; + } + } + + // C += A U^T, U upper triangular matrix + inline void geutrTmm(unsigned m, unsigned n, double* U, unsigned ldU, + double* A, unsigned ldA) + { + dtrmm_(JOB_STR+8, JOB_STR+5, JOB_STR+1, JOB_STR, &m, &n, &D_ONE, U, &ldU, + A, &ldA); + } + + // product of two upper triangular matrices M = R1 R2^T, R1 mxp, R2 nxp + inline void utrmmh(unsigned m, unsigned p, unsigned n, double* R1, + unsigned ldR1, double* R2, unsigned ldR2, double* M) + { + for (unsigned j=0; j<n; ++j) { + for (unsigned i=0; i<m; ++i) { + double d = D_ZERO; + for (unsigned l=MAX(i,j); l<p; ++l) d += R1[i+l*ldR1] * R2[j+l*ldR2]; + M[i+j*m] = d; + } + } + } + inline void utrmmh(unsigned m, unsigned p, unsigned n, float* R1, + unsigned ldR1, float* R2, unsigned ldR2, float* M) + { + for (unsigned j=0; j<n; ++j) { + for (unsigned i=0; i<m; ++i) { + float d = S_ZERO; + for (unsigned l=MAX(i,j); l<p; ++l) d += R1[i+l*ldR1] * R2[j+l*ldR2]; + M[i+j*m] = d; + } + } + } + + // C += d U A, where U is upper triangular in packed storage + template<class T> + inline void putrgemm(T d, unsigned n, T* U, unsigned p, T* A, T* C) + { + for (unsigned j=0; j<p; ++j) { + for (unsigned l=0; l<n; ++l) { + T e = d * A[l+j*n]; + unsigned idl = l*(l+1)/2; + for (unsigned i=0; i<=l; ++i) C[i+j*n] += e * U[i+idl]; + } + } + } + + // A += d U L, where U is upper triangular in packed storage + // and L is lower triangular in packed storage + inline void putrltrmm(double d, unsigned n, double* U, double* L, + double* A) + { + unsigned* ip = new unsigned[n]; + for(unsigned i=0;i<n;i++) + ip[i]=L[((2*n-i+1)*i)/2]; + + for(unsigned j=0;j<n;j++){ + const unsigned ipj = ip[j]; + const unsigned idUj = (ipj*(ipj+1))/2; + unsigned idL = ((2*n-j+1)*j)/2; + for(unsigned i=0;i<=ipj;i++) + A[i]+=d*U[idUj+i]; + for(unsigned k=j+1;k<n;k++){ + const unsigned ipk = ip[k]; + const double t = d*L[++idL]; + const unsigned idUk = (ipk*(ipk+1))/2; + for(unsigned i=0;i<=ipk;i++) + A[i]+=U[idUk+i]*t; + } + A+=n; + } + delete [] ip; + } + + inline void putrltrmm(float d, unsigned n, float* U, float* L, + float* A) + { + unsigned* ip = new unsigned[n]; + for(unsigned i=0;i<n;i++) + ip[i]=L[((2*n-i+1)*i)/2]; + + for(unsigned j=0;j<n;j++){ + const unsigned ipj = ip[j]; + const unsigned idUj = (ipj*(ipj+1))/2; + unsigned idL = ((2*n-j+1)*j)/2; + for(unsigned i=0;i<=ipj;i++) + A[i]+=d*U[idUj+i]; + for(unsigned k=j+1;k<n;k++){ + const unsigned ipk = ip[k]; + const float t = d*L[++idL]; + const unsigned idUk = (ipk*(ipk+1))/2; + for(unsigned i=0;i<=ipk;i++) + A[i]+=U[idUk+i]*t; + } + A+=n; + } + delete [] ip; + } + + // A += d U U^H, where U is upper triangular in packed storage + // only the upper triangular part of A is computed + inline void putrmmh(double d, unsigned n, double* U, double* A) + { + for (unsigned j=0; j<n; ++j) { + for (unsigned l=j; l<n; ++l) { + unsigned idl = l*(l+1)/2; + double e = d * U[j+idl]; + for (unsigned i=0; i<=j; ++i) A[i+j*(j+1)/2] += e * U[i+idl]; + } + } + } + + inline void putrmmh(float d, unsigned n, float* U, float* A) + { + for (unsigned j=0; j<n; ++j) { + for (unsigned l=j; l<n; ++l) { + unsigned idl = l*(l+1)/2; + float e = d * U[j+idl]; + for (unsigned i=0; i<=j; ++i) A[i+j*(j+1)/2] += e * U[i+idl]; + } + } + } + + inline void fill0_ltr(unsigned n, double* A) + { + for (unsigned j=0; j<n; ++j) { + *A++ = (double) j; + for (unsigned i=j+1; i<n; ++i) *A++ = D_ZERO; + } + } + inline void fill0_ltr(unsigned n, float* A) + { + for (unsigned j=0; j<n; ++j) { + *A++ = (float) j; + for (unsigned i=j+1; i<n; ++i) *A++ = S_ZERO; + } + } + + // fill nxn matrix A with identity + inline void fillId(unsigned n, double *A) + { + for (unsigned j=0; j<n; ++j) { + unsigned i = 0; + for (; i<j; ++i) *A++ = D_ZERO; + *A++ = D_ONE; + ++i; + for (; i<n; ++i) *A++ = D_ZERO; + } + } + inline void fillId(unsigned n, float *A) + { + for (unsigned j=0; j<n; ++j) { + unsigned i = 0; + for (; i<j; ++i) *A++ = S_ZERO; + *A++ = S_ONE; + ++i; + for (; i<n; ++i) *A++ = S_ZERO; + } + } + + // fill nxn upper triang. matrix A with identity (packed storage) + inline void fillId_utr(unsigned n, double *A) + { + for (unsigned j=0; j<n; ++j) { + for (unsigned i=0; i<j; ++i) *A++ = D_ZERO; + *A++ = D_ONE; + } + } + inline void fillId_utr(unsigned n, float *A) + { + for (unsigned j=0; j<n; ++j) { + for (unsigned i=0; i<j; ++i) *A++ = S_ZERO; + *A++ = S_ONE; + } + } + + // fill nxn normalized lower triang. matrix A with identity (packed storage) + inline void fillId_ltr(unsigned n, double *A) + { + for (unsigned i=0; i<n; ++i) { + for (unsigned j=0; j<i; ++j) *A++ = D_ZERO; + // for pivoting, a ltr is assumed to have ones on the diagonal + *A++ = (double) i; + } + } + inline void fillId_ltr(unsigned n, float *A) + { + for (unsigned i=0; i<n; ++i) { + for (unsigned j=0; j<i; ++j) *A++ = S_ZERO; + // for pivoting, a ltr is assumed to have ones on the diagonal + *A++ = (float) i; + } + } + +} + + +namespace lapack +{ + + // general inversion + inline void geinv(const unsigned n, double* const A) + { + unsigned* const ipiv = new unsigned[n]; + assert(ipiv!=NULL); + + const unsigned lwork = 4*n; + double* const work = new double[lwork]; + assert(work!=NULL); + + int INFO = blas::getrf(n, A, ipiv); + assert(INFO==0); + + dgetri_(&n, A, &n, ipiv, work, &lwork, &INFO); + assert(INFO==0); + + delete [] work; + delete [] ipiv; + } + inline void geinv(const unsigned n, float* const A) + { + unsigned* const ipiv = new unsigned[n]; + assert(ipiv!=NULL); + + const unsigned lwork = 4*n; + float* const work = new float[lwork]; + assert(work!=NULL); + + int INFO = blas::getrf(n, A, ipiv); + assert(INFO==0); + + sgetri_(&n, A, &n, ipiv, work, &lwork, &INFO); + assert(INFO==0); + + delete [] work; + delete [] ipiv; + } + + inline void geinv_sym(const unsigned n, double* const A) + { + int* const ipiv = new int[n]; + assert(ipiv!=NULL); + + const unsigned lwork = 4*n; + double* const work = new double[lwork]; + assert(work!=NULL); + + int INFO; + dsptrf_(JOB_STR+5, &n, A, ipiv, &INFO); + assert(INFO==0); + + dsptri_(JOB_STR+5, &n, A, ipiv, work, &INFO); + assert(INFO==0); + + delete [] work; + delete [] ipiv; + } + inline void geinv_sym(const unsigned n, float* const A) + { + int* const ipiv = new int[n]; + assert(ipiv!=NULL); + + const unsigned lwork = 4*n; + float* const work = new float[lwork]; + assert(work!=NULL); + + int INFO; + ssptrf_(JOB_STR+5, &n, A, ipiv, &INFO); + assert(INFO==0); + + ssptri_(JOB_STR+5, &n, A, ipiv, work, &INFO); + assert(INFO==0); + + delete [] work; + delete [] ipiv; + } + + // packed triangular factorisation of positive definite matrix + inline int pptrf(const unsigned n, double* A) + { + int inf; + dpptrf_(JOB_STR+5, &n, A, &inf); + return inf; + } + inline int pptrf(const unsigned n, float* A) + { + int inf; + spptrf_(JOB_STR+5, &n, A, &inf); + return inf; + } + + // packed triangular factorization of hermitian matrix + inline int sptrf(const unsigned n, double* A, int* ipiv) + { + int inf; + dsptrf_(JOB_STR+5, &n, A, ipiv, &inf); + return inf; + } + inline int sptrf(const unsigned n, float* A, int* ipiv) + { + int inf; + ssptrf_(JOB_STR+5, &n, A, ipiv, &inf); + return inf; + } + + // lower triangular solve + inline void ltrs(const unsigned n, double* A, + const unsigned p, double* B, const unsigned ldB) + { + // dtptrs_(JOB_STR+6, JOB_STR, JOB_STR+5, &n, &p, A, B, &ldB, &inf); + for (unsigned i=0; i<p; ++i) + dtpsv_(JOB_STR+6, JOB_STR, JOB_STR+5, &n, A, B+i*ldB, &N_ONE); + } + inline void ltrs(const unsigned n, float* A, + const unsigned p, float* B, const unsigned ldB) + { + // stptrs_(JOB_STR+6, JOB_STR, JOB_STR+5, &n, &p, A, B, &ldB, &inf); + for (unsigned i=0; i<p; ++i) + stpsv_(JOB_STR+6, JOB_STR, JOB_STR+5, &n, A, B+i*ldB, &N_ONE); + } + + // lower triangular transpose solve + inline void ltrhs(const unsigned n, double* A, + const unsigned p, double* B, const unsigned ldB) + { + // dtptrs_(JOB_STR+6, JOB_STR+1, JOB_STR+5, &n, &p, A, B, &ldB, &inf); + for (unsigned i=0; i<p; ++i) + dtpsv_(JOB_STR+6, JOB_STR+1, JOB_STR+5, &n, A, B+i*ldB, &N_ONE); + } + inline void ltrhs(const unsigned n, float* A, + const unsigned p, float* B, const unsigned ldB) + { + // stptrs_(JOB_STR+6, JOB_STR+1, JOB_STR+5, &n, &p, A, B, &ldB, &inf); + for (unsigned i=0; i<p; ++i) + stpsv_(JOB_STR+6, JOB_STR+1, JOB_STR+5, &n, A, B+i*ldB, &N_ONE); + } + + // unit upper triangular solve (with L and R stored in one matrix) + // XR=B, R is pxp, B is nxp + inline void utrcs(const unsigned p, const double* LR, const unsigned ldLR, + const unsigned n, double* X, const unsigned ldX) + { + dtrsm_(JOB_STR+8, JOB_STR+5, JOB_STR, JOB_STR+5, &n, &p, &D_ONE, + LR, &ldLR, X, &ldX); + } + + inline void utrcs(const unsigned p, const float* LR, const unsigned ldLR, + const unsigned n, float* X, const unsigned ldX) + { + strsm_(JOB_STR+8, JOB_STR+5, JOB_STR, JOB_STR+5, &n, &p, &S_ONE, + LR, &ldLR, X, &ldX); + } + + // unit upper triangular solve (with L and R stored in one matrix) + // RX=B, R is nxn, B is nxp + inline void utlcs(const unsigned n, float* LR, const unsigned ldLR, + const unsigned p, float* X, const unsigned ldX) + { + strsm_(JOB_STR+6, JOB_STR+5, JOB_STR, JOB_STR+5, &n, &p, &S_ONE, + LR, &ldLR, X, &ldX); + } + inline void utlcs(const unsigned n, double* LR, const unsigned ldLR, + const unsigned p, double* X, const unsigned ldX) + { + dtrsm_(JOB_STR+6, JOB_STR+5, JOB_STR, JOB_STR+5, &n, &p, &D_ONE, + LR, &ldLR, X, &ldX); + } + + // unit lower triangular solve (with L and R stored in one matrix) + // XL=B, L is pxp, B is nxp + inline void ltrcs(const unsigned p, float* LR, const unsigned ldLR, + const unsigned n, float* X, const unsigned ldX) + { + strsm_(JOB_STR+8, JOB_STR+6, JOB_STR, JOB_STR+5, &n, &p, &S_ONE, + LR, &ldLR, X, &ldX); + } + inline void ltrcs(const unsigned p, double* LR, const unsigned ldLR, + const unsigned n, double* X, const unsigned ldX) + { + dtrsm_(JOB_STR+8, JOB_STR+6, JOB_STR, JOB_STR+5, &n, &p, &D_ONE, + LR, &ldLR, X, &ldX); + } + + + // unit lower triangular transposed solve (with L and R stored in one matrix) + // XL^T=B, L is pxp, B is nxp + inline void ltrhcs(const unsigned p, double* LR, const unsigned ldLR, + const unsigned n, double* X, const unsigned ldX) + { + dtrsm_(JOB_STR+8, JOB_STR+6, JOB_STR+1, JOB_STR+5, &n, &p, &D_ONE, + LR, &ldLR, X, &ldX); + } + + // unit lower triangular solve (with L and R stored in one matrix) + // LX=B, L is nxn, B is nxp + inline void ltlcs(const unsigned n, float* LR, const unsigned ldLR, + const unsigned p, float* X, const unsigned ldX) + { + strsm_(JOB_STR+6, JOB_STR+6, JOB_STR, JOB_STR+5, &n, &p, &S_ONE, + LR, &ldLR, X, &ldX); + } + inline void ltlcs(const unsigned n, double* LR, const unsigned ldLR, + const unsigned p, double* X, const unsigned ldX) + { + dtrsm_(JOB_STR+6, JOB_STR+6, JOB_STR, JOB_STR+5, &n, &p, &D_ONE, + LR, &ldLR, X, &ldX); + } + + // upper triangular solve + inline void utrs(const unsigned n, double* A, + const unsigned p, double* B, const unsigned ldB) + { + // dtptrs_(JOB_STR+5, JOB_STR, JOB_STR, &n, &p, A, B, &ldB, &inf); + for (unsigned i=0; i<p; ++i) + dtpsv_(JOB_STR+5, JOB_STR, JOB_STR, &n, A, B+i*ldB, &N_ONE); + } + inline void utrs(const unsigned n, float* A, + const unsigned p, float* B, const unsigned ldB) + { + //stptrs_(JOB_STR+5, JOB_STR, JOB_STR, &n, &p, A, B, &ldB, &inf); + for (unsigned i=0; i<p; ++i) + stpsv_(JOB_STR+5, JOB_STR, JOB_STR, &n, A, B+i*ldB, &N_ONE); + } + + // upper triangluar transpose solve + inline void utrhs(const unsigned n, double* A, + const unsigned p, double* B, const unsigned ldB) + { + //dtptrs_(JOB_STR+5, JOB_STR+1, JOB_STR, &n, &p, A, B, &ldB, &inf); + for (unsigned i=0; i<p; ++i) + dtpsv_(JOB_STR+5, JOB_STR+1, JOB_STR, &n, A, B+i*ldB, &N_ONE); + } + inline void utrhs(const unsigned n, float* A, + const unsigned p, float* B, const unsigned ldB) + { + //stptrs_(JOB_STR+5, JOB_STR+1, JOB_STR, &n, &p, A, B, &ldB, &inf); + for (unsigned i=0; i<p; ++i) + stpsv_(JOB_STR+5, JOB_STR+1, JOB_STR, &n, A, B+i*ldB, &N_ONE); + } +} + +#endif diff --git a/MathLib/LinAlg/Solvers/solver.h b/MathLib/LinAlg/Solvers/solver.h new file mode 100644 index 00000000000..79772ef8758 --- /dev/null +++ b/MathLib/LinAlg/Solvers/solver.h @@ -0,0 +1,18 @@ +/* + * solver.h + * + * Created on: Sep 27, 2011 + * Author: fischeth + */ + +#ifndef SOLVER_H_ +#define SOLVER_H_ + +namespace MathLib { + +unsigned CG(CRSMatrix<double> const * mat, double const * const b, + double* const x, double& eps, unsigned& nsteps, unsigned num_threads = 1); + +} // end namespace MathLib + +#endif /* SOLVER_H_ */ diff --git a/MathLib/CRSMatrix.h b/MathLib/LinAlg/Sparse/CRSMatrix.h similarity index 85% rename from MathLib/CRSMatrix.h rename to MathLib/LinAlg/Sparse/CRSMatrix.h index f040e654276..977cc91b2bf 100644 --- a/MathLib/CRSMatrix.h +++ b/MathLib/LinAlg/Sparse/CRSMatrix.h @@ -1,3 +1,10 @@ +/* + * CRSMatrix.h + * + * Created on: Sep 20, 2011 + * Author: TF + */ + #ifndef CRSMATRIX_H #define CRSMATRIX_H @@ -8,6 +15,8 @@ #include "sparse.h" #include "amuxCRS.h" +namespace MathLib { + template<class T> class CRSMatrix : public SparseMatrixBase<T> { public: @@ -42,7 +51,7 @@ public: virtual void amux(T d, T const * const x, T *y) const { - amuxCRS(d, SparseMatrixBase<T>::_n_rows, _row_ptr, _col_idx, _data, x, y); + amuxCRS(d, MatrixBase::_n_rows, _row_ptr, _col_idx, _data, x, y); } protected: @@ -51,5 +60,7 @@ protected: T* _data; }; +} // end namespace MathLib + #endif diff --git a/MathLib/CRSMatrixDiagPrecond.h b/MathLib/LinAlg/Sparse/CRSMatrixDiagPrecond.h similarity index 100% rename from MathLib/CRSMatrixDiagPrecond.h rename to MathLib/LinAlg/Sparse/CRSMatrixDiagPrecond.h diff --git a/MathLib/CRSMatrixOpenMP.h b/MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h similarity index 80% rename from MathLib/CRSMatrixOpenMP.h rename to MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h index 009be74ee29..408304ae8b6 100644 --- a/MathLib/CRSMatrixOpenMP.h +++ b/MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h @@ -13,6 +13,8 @@ #include "CRSMatrix.h" #include "amuxCRS.h" +namespace MathLib { + template<class T> class CRSMatrixOpenMP : public CRSMatrix<T> { public: CRSMatrixOpenMP(std::string const &fname, unsigned num_of_threads) : @@ -32,11 +34,13 @@ public: virtual void amux(T d, T const * const x, T *y) const { - amuxCRSParallelOpenMP(d, SparseMatrixBase<T>::_n_rows, CRSMatrix<T>::_row_ptr, CRSMatrix<T>::_col_idx, CRSMatrix<T>::_data, x, y, _num_of_threads); + amuxCRSParallelOpenMP(d, MatrixBase::_n_rows, CRSMatrix<T>::_row_ptr, CRSMatrix<T>::_col_idx, CRSMatrix<T>::_data, x, y, _num_of_threads); } private: unsigned _num_of_threads; }; +} // end namespace MathLib + #endif /* CRSMATRIXOPENMP_H_ */ diff --git a/MathLib/CRSMatrixPThreads.h b/MathLib/LinAlg/Sparse/CRSMatrixPThreads.h similarity index 95% rename from MathLib/CRSMatrixPThreads.h rename to MathLib/LinAlg/Sparse/CRSMatrixPThreads.h index 7c56d9b8948..eeabaac7617 100644 --- a/MathLib/CRSMatrixPThreads.h +++ b/MathLib/LinAlg/Sparse/CRSMatrixPThreads.h @@ -15,6 +15,8 @@ #include "CRSMatrix.h" #include "amuxCRS.h" +namespace MathLib { + template<class T> class CRSMatrixPThreads : public CRSMatrix<T> { public: @@ -42,5 +44,7 @@ protected: unsigned _num_of_threads; }; +} // end namespace MathLib + #endif diff --git a/MathLib/CRSSymMatrix.h b/MathLib/LinAlg/Sparse/CRSSymMatrix.h similarity index 100% rename from MathLib/CRSSymMatrix.h rename to MathLib/LinAlg/Sparse/CRSSymMatrix.h diff --git a/MathLib/LinAlg/Sparse/CRSTranspose.h b/MathLib/LinAlg/Sparse/CRSTranspose.h new file mode 100644 index 00000000000..6b3eba98604 --- /dev/null +++ b/MathLib/LinAlg/Sparse/CRSTranspose.h @@ -0,0 +1,44 @@ +/* + * CRSTranspose.h + * + * Created on: Sep 27, 2011 + * Author: fischeth + */ + +#ifndef CRSTRANSPOSE_H_ +#define CRSTRANSPOSE_H_ + +void CS_transp(unsigned n, double* A, unsigned* jA, unsigned *iA, + double* B, unsigned *jB, unsigned *iB) +{ + unsigned nnz = iA[n]; + unsigned *inz = new unsigned[n]; + + for (unsigned i=0; i<n; ++i) inz[i] = 0; + + // compute number of entries of each column in A + for (unsigned l=0; l<nnz; ++l) inz[jA[l]]++; + + // create iB + iB[0] = nnz = 0; + for (unsigned i=0; i<n; ++i) { + nnz += inz[i]; + inz[i] = 0; + iB[i+1] = nnz; + } + + // create arrays jB, B + unsigned l = iA[0]; + for (unsigned i=0; i<n; ++i) { + while (l<iA[i+1]) { + unsigned j = jA[l]; + unsigned k = iB[j] + inz[j]++; + B[k] = A[l++]; + jB[k] = i; + } + } + + delete [] inz; +} + +#endif /* CRSTRANSPOSE_H_ */ diff --git a/MathLib/LinAlg/Sparse/SparseMatrixBase.h b/MathLib/LinAlg/Sparse/SparseMatrixBase.h new file mode 100644 index 00000000000..d6c65de2747 --- /dev/null +++ b/MathLib/LinAlg/Sparse/SparseMatrixBase.h @@ -0,0 +1,19 @@ +#ifndef SPARSEMATRIXBASE_H +#define SPARSEMATRIXBASE_H + +#include "../MatrixBase.h" + +namespace MathLib { + +template<class T> class SparseMatrixBase : public MatrixBase +{ +public: + SparseMatrixBase(unsigned n1, unsigned n2) : MatrixBase (n1,n2) {} + SparseMatrixBase() : MatrixBase () {} + virtual void amux(T d, T const * const x, T *y) const = 0; // y +=d*Ax + virtual ~SparseMatrixBase() { } +}; + +} // end namespace MathLib + +#endif diff --git a/MathLib/amuxCRS.cpp b/MathLib/LinAlg/Sparse/amuxCRS.cpp similarity index 95% rename from MathLib/amuxCRS.cpp rename to MathLib/LinAlg/Sparse/amuxCRS.cpp index 336527d0bd6..22e9aef84bd 100644 --- a/MathLib/amuxCRS.cpp +++ b/MathLib/LinAlg/Sparse/amuxCRS.cpp @@ -1,9 +1,16 @@ +/* + * CRSMatrix.h + * + * Created on: Sep 20, 2011 + * Author: TF + */ + #include "amuxCRS.h" #include <omp.h> #include <omp.h> #include <pthread.h> -void amuxCRS (double a, +void amuxCRS (double a, unsigned n, unsigned const * const iA, unsigned const * const jA, double const * const A, double const * const x, double* y) { @@ -18,7 +25,7 @@ void amuxCRS (double a, } struct MatMultThreadParam { - MatMultThreadParam (double scalar_factor, unsigned beg_row, unsigned end_row, + MatMultThreadParam (double scalar_factor, unsigned beg_row, unsigned end_row, unsigned const * const iA, unsigned const * const jA, double const * const A, double const * const x, double* y) : _a (scalar_factor), _beg_row(beg_row), _end_row(end_row), @@ -60,7 +67,7 @@ void* amuxCRSpthread (void* ptr) } } // end extern "C" -void amuxCRSParallelPThreads (double a, +void amuxCRSParallelPThreads (double a, unsigned n, unsigned const * const iA, unsigned const * const jA, double const * const A, double const * const x, double* y, unsigned num_of_pthreads) @@ -99,7 +106,7 @@ void amuxCRSParallelPThreads (double a, #endif } -void amuxCRSParallelOpenMP (double a, +void amuxCRSParallelOpenMP (double a, unsigned n, unsigned const * const iA, unsigned const * const jA, double const * const A, double const * const x, double* y, unsigned num_of_omp_threads) diff --git a/MathLib/amuxCRS.h b/MathLib/LinAlg/Sparse/amuxCRS.h similarity index 100% rename from MathLib/amuxCRS.h rename to MathLib/LinAlg/Sparse/amuxCRS.h diff --git a/MathLib/MathTools.cpp b/MathLib/MathTools.cpp new file mode 100644 index 00000000000..b332447976c --- /dev/null +++ b/MathLib/MathTools.cpp @@ -0,0 +1,73 @@ +/* + * MathTools.cpp + * + * Created on: Jan 13, 2010 + * Author: TF + */ + +#include "MathTools.h" + +namespace MathLib { + +void crossProd(const double u[3], const double v[3], double r[3]) +{ + r[0] = u[1] * v[2] - u[2] * v[1]; + r[1] = u[2] * v[0] - u[0] * v[2]; + r[2] = u[0] * v[1] - u[1] * v[0]; +} + +double calcProjPntToLineAndDists(const double p[3], const double a[3], + const double b[3], double &lambda, double &d0) +{ + // g (lambda) = a + lambda v, v = b-a + double v[3] = {b[0] - a[0], b[1] - a[1], b[2] - a[2]}; + // orthogonal projection: (g(lambda)-p) * v = 0 => in order to compute lambda we define a help vector u + double u[3] = {p[0] - a[0], p[1] - a[1], p[2] - a[2]}; + lambda = scpr (u, v, 3) / scpr (v, v, 3); + + // compute projected point + double proj_pnt[3]; + for (size_t k(0); k<3; k++) proj_pnt[k] = a[k] + lambda * v[k]; + + d0 = sqrt (sqrDist (proj_pnt, a)); + + return sqrt (sqrDist (p, proj_pnt)); +} + +double sqrNrm2 (const GEOLIB::Point* p0) +{ + return scpr (p0->getData(), p0->getData(), 3); +} + +double sqrDist (const GEOLIB::Point* p0, const GEOLIB::Point* p1) +{ + const double v[3] = {(*p1)[0] - (*p0)[0], (*p1)[1] - (*p0)[1], (*p1)[2] - (*p0)[2]}; + return scpr (v, v, 3); +} + +double sqrDist(const double* p0, const double* p1) +{ + const double v[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]}; + return scpr (v, v, 3); +} + +bool checkDistance(GEOLIB::Point const &p0, GEOLIB::Point const &p1, double squaredDistance) +{ + return (sqrDist(&p0, &p1) < squaredDistance); +} + +float normalize(float min, float max, float val) +{ + return ((val-min)/static_cast<float>(max-min)); +} + +double getAngle (const double p0[3], const double p1[3], const double p2[3]) +{ + const double v0[3] = {p0[0]-p1[0], p0[1]-p1[1], p0[2]-p1[2]}; + const double v1[3] = {p2[0]-p1[0], p2[1]-p1[1], p2[2]-p1[2]}; + + // apply Cauchy Schwarz inequality + return acos (scpr (v0,v1,3) / (sqrt(scpr(v0,v0,3)) * sqrt(scpr (v1,v1,3)))); +} + +} // namespace diff --git a/MathLib/MathTools.h b/MathLib/MathTools.h new file mode 100644 index 00000000000..71746cbafd1 --- /dev/null +++ b/MathLib/MathTools.h @@ -0,0 +1,105 @@ +/* + * \file MathTools.h + * + * Created on: Jan 13, 2010 + * Author: fischeth + */ + +#ifndef MATHTOOLS_H_ +#define MATHTOOLS_H_ + +#include <vector> +#include <cmath> +#include <limits> +#include "Point.h" + +namespace MathLib { + +/** + * standard inner product in R^3 + * \param v0 array of type T representing the vector + * \param v1 array of type T representing the vector + * \param n the size of the array + * */ +template<class T> inline +double scpr(const T* v0, const T* v1, size_t n) { + long double res(0.0); + for (size_t k(0); k<n; k++) + res += v0[k] * v1[k]; + return (double) res; +} + +/** + * computes the cross (or vector) product of the 3d vectors u and v + * the result is given in the vector r + */ +void crossProd (const double u[3], const double v[3], double r[3]); + +/** + * calcProjPntToLineAndDists computes the orthogonal projection + * of a point p to the line described by the points a and b, + * $g(\lambda) = a + \lambda (b - a)$, + * the distance between p and the projected point + * and the distances between the projected point and the end + * points a, b of the line + * \param p the (mesh) point + * \param a first point of line + * \param b second point of line + * \param lambda the projected point described by the line equation above + * \param d0 distance to the line point a + * \returns the distance between p and the orthogonal projection of p + */ +double calcProjPntToLineAndDists(const double p[3], const double a[3], + const double b[3], double &lambda, double &d0); + +/** + * Checks if two points are within a given distance of each other + * @param p0 The first point + * @param p1 the second point + * @param squaredDistance The square of the distance within which the two points should be + * @return true if p1 and p2 are within the given distance of each other, false otherwise + */ +bool checkDistance(GEOLIB::Point const &p0, GEOLIB::Point const &p1, double squaredDistance); + +/** squared euklid norm of the vector p0 */ +double sqrNrm2(const GEOLIB::Point* const p0); + +/** squared dist between GEOLIB::Points p0 and p1 */ +double sqrDist(const GEOLIB::Point* p0, const GEOLIB::Point* p1); + +/** squared dist between double arrays p0 and p1 (size of arrays is 3) */ +double sqrDist(const double* p0, const double* p1); + +/** linear normalisation of val from [min, max] into [0,1] */ +float normalize(float min, float max, float val); + +/** + * Let \f$p_0, p_1, p_2 \in \mathbb{R}^3\f$. The function getAngle + * computes the angle between the edges \f$(p_0,p_1)\f$ and \f$(p_1,p_2)\f$ + * @param p0 start point of edge 0 + * @param p1 end point of edge 0 and start point of edge 1 + * @param p2 end point of edge 1 + * @return the angle between the edges + */ +double getAngle (const double p0[3], const double p1[3], const double p2[3]); + +/** + * simple power function that takes as a second argument an integer instead of a float + * @param base basis of the expression + * @param exp exponent of the expression + * @return base^exp + */ +template <typename T> inline +T fastpow (T base, size_t exp) +{ + T result (base); + if (exp == 0) result = static_cast<T>(1); + for (size_t k(1); k<exp; k++) { + result *= base; + } + return result; +} + +} // namespace + +#endif /* MATHTOOLS_H_ */ diff --git a/MathLib/SparseMatrixBase.h b/MathLib/SparseMatrixBase.h deleted file mode 100644 index 831818a1b13..00000000000 --- a/MathLib/SparseMatrixBase.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef SPARSEMATRIXBASE_H -#define SPARSEMATRIXBASE_H - -template<class T> class SparseMatrixBase -{ -public: - SparseMatrixBase(unsigned n1, unsigned n2) : _n_rows(n1), _n_cols(n2) { } - SparseMatrixBase() : _n_rows(0), _n_cols(0) { } - virtual void amux(T d, T const * const x, T *y) const = 0; // y +=d*Ax - virtual ~SparseMatrixBase() { } - unsigned getNRows () const { return _n_rows; } - unsigned getNCols () const { return _n_cols; } - -protected: - unsigned _n_rows; - unsigned _n_cols; -}; - - -#endif - diff --git a/MathLib/sparse.h b/MathLib/sparse.h index 44ecf09e79b..fa3f6623659 100644 --- a/MathLib/sparse.h +++ b/MathLib/sparse.h @@ -52,37 +52,5 @@ CS_read(std::istream &is, unsigned &n, unsigned* &iA, unsigned* &jA, T* &A) #endif } -void CS_transp(unsigned n, double* A, unsigned* jA, unsigned *iA, - double* B, unsigned *jB, unsigned *iB) -{ - unsigned nnz = iA[n]; - unsigned *inz = new unsigned[n]; - - for (unsigned i=0; i<n; ++i) inz[i] = 0; - - // compute number of entries of each column in A - for (unsigned l=0; l<nnz; ++l) inz[jA[l]]++; - - // create iB - iB[0] = nnz = 0; - for (unsigned i=0; i<n; ++i) { - nnz += inz[i]; - inz[i] = 0; - iB[i+1] = nnz; - } - - // create arrays jB, B - unsigned l = iA[0]; - for (unsigned i=0; i<n; ++i) { - while (l<iA[i+1]) { - unsigned j = jA[l]; - unsigned k = iB[j] + inz[j]++; - B[k] = A[l++]; - jB[k] = i; - } - } - - delete [] inz; -} - #endif + diff --git a/SimpleTests/MatrixTests/.MatMult.cpp.swp b/SimpleTests/MatrixTests/.MatMult.cpp.swp deleted file mode 100644 index 808689bb3f0aefe083b5aa63d6b6f520e7d2869f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeI2OKclO7{{lSM@=cE<;vm1<*;@fJ4r)ZIS(iS@oGraB)x>lTCd0PB<oqTkHkr7 zD;4n+NC<%h^h!@0-~uNOJc?QgE(js$k*YV;9ykCOB>ppNuU~P<0U?1l>MyqE`OW`( zjH7OG=+u+bbTm7}@N*AiFJ9kx=I9FtcJE6wRu-OB;eJJRZTC~+Ya>}9eeTL^P~Oaz z_|~J(HW$pTJMOAiGcBH*d3<j0NX~Nt*WzAoPE>0)pXZ+M<)%&F74_)-bYS~gt5(~3 z>9{pkAXZ>I1p>c3cx)d#JTk0P{ZR6Ly7%D1b~EC>Sb<o9Sb<o9Sb<o9Sb<o9Sb<o9 z|Cs`QZ5KO-Gu_fSm*05Z)%Du=7{9~{#0ta;#0ta;#0ta;#0ta;#0ta;#0ta;#0vZe z6);PTE#r^>xm#g8{{O%E_y3Lkj9mp+z^C9Mcnh2b%it+601klP?`G^b@B{cBd<ka2 z6nGds1Rel;!OeY)-2k6~i{LfzDp&?(Fbie?fva~hb{TvC-UF|IJU9Xf><7Q@W$YUG z4!j9o2j{^UI0lm7Ah>)dW0$~J;B)XksDc$Bz$0J}_%^}VH{fIN4tN^`U>TIbDR6xc zV?To{;3IGWybOvU4Gw~vcQEz~_z8RsE`cw=1+V}{!L{3QR`4`<0z3vD1*70T@X2kA zeF)wK7MKBl!cTvIYv4!l5;zaefwQ0niogV?z&udCy|Ekif;YelD1i9qUr^u-OSs$* zT*;z$G&IIoqQq_Pb6Q-Yjj_h3oeb$`^53t{X$R{ogZgc;)ApU1f@9Z}3K*+Ncz!9L zx17MI@o}<hLBSVQ?q<1d);wM^QmQ%mte;e`ZXmbn)=O|%vY}_<3~}4zbQ*8r01r<O z6&x>X*^bAJlo}9j+v@v=_DzT~8PSHR&;V_jQ}%q9n^lT>$uQmJwZ+k;4CxWV@$z}I zR@3&Wb<l0m!~{L4*NKj_jY8ycOUXw{KCHXAonT2l8fu24_0SFx^@pZYk|jqXJd#Ww zagDo0XsHZi>0CcYCf7_m;It8%v8tW8L=&X9+E`p&(k&z%pZIfuX_N1e45~%$l2axw z@dz0ANJw&>^>9+SMuz-~>67rnMsa*`bzpdDP`&CUJ5HgL)D2~`=oxy~K^jRL9kWs? znhd>w{kbcwf-S1T_r^<3P_+4EK3^5mz$ipi*>TATq$idoFVU(olp3Seafx381A0KG zZk~RsjeI=TrcPDW54O=7w|uNqMPyPon+*>VnrR1K1tFTtg>%Aq*?KC=rIAIggz$W) zrsKPnLQ&HhmdhnN6D{ei_Ie!#X3c6)Cu2A}l%aZtHV~1$bG7Mudgzos$IxndVv`sA zmR1Ed*=s4*Zm#M2`Uvf(YPL<yF3cmc6>h6cK&+F2Y;{C4(V;3#r$2ffAZoU-OkX&X z)`ef8d8g`;l3?oO92xqEs5lj5Z^;WIw(%j==gcbP+A}1PsH@1FASeo(`UcRzz-mg} z4Las1y<tecPT{9Txuohsv#Z|Ir91?C!{;Vv=R<3o;j5yK44ENuJVPr0|Dq#>-i_wY z)VQ3U-RaEMp8aQF16?HgoLg=-0zs$m1dV8s8sVR(vaoG3fs~t3P0)TC4#U6K#j~bs zR&l+$9;&Tb%9ET3y{Ns1qsz@k1C~TN)dAADI=Xz9h!P!>RLuze(7PvhUF5UcPRQkE zmX}?p8m4wx!lHt5L)U&$S>Ng=M1p3*`Kq9I--qO-degVt{zN!4Dg%kBxq`CN5D16% zh*04b4Q?h}XJYB4)G&4YuC&_O$&-CDru$OSpM})rH1a@N95<d>IB_DCq4Y}hR<@-@ zqY*9IrJ{M~qIO3P$XW&5#0C}#$EaHuSIqNmAB#eouBv*f2&dEOZnt8%h1fzoD~D<) zBT6|^x9B)FYP-H1$?)BPtKbi<!LBMaB10PY|A<O#mXKjhM3FFkM>P5l>!gnaXpT`= zj<!vSj=PYz-I}VhwiPtosJzatz*p8qDc`xvaQ80FT4xNm@6)=O8~s>O1p==N6c;_T zD<&2^bD8JE0M_4ihMG5iP%Sv+sHQ?iJnY>#gzD%#gvw&|6;5f~6YwhD^%tda8dc$P zn&DY=<R4tF-l(n&SryZzv^HT0750#jmK~HhCEG8QY$e&&(|)6L_C%GkS!MflhlR9H zXJ<2>hIc*Q(Kc=&JZ({&Y}qvKRBPEvdqemorM<<1mK{Uztx|vQY#-D}i%Iqul?60K diff --git a/SimpleTests/MatrixTests/MatMult.cpp b/SimpleTests/MatrixTests/MatMult.cpp index 182e119d6de..d68b3b60ba2 100644 --- a/SimpleTests/MatrixTests/MatMult.cpp +++ b/SimpleTests/MatrixTests/MatMult.cpp @@ -5,9 +5,9 @@ #include <omp.h> #include <cstdlib> #include "sparse.h" -#include "CRSMatrix.h" -#include "CRSMatrixOpenMP.h" -#include "CRSMatrixPThreads.h" +#include "LinAlg/Sparse/CRSMatrix.h" +#include "LinAlg/Sparse/CRSMatrixOpenMP.h" +#include "LinAlg/Sparse/CRSMatrixPThreads.h" #include "RunTimeTimer.h" #include "CPUTimeTimer.h" @@ -58,7 +58,8 @@ int main(int argc, char *argv[]) for (unsigned k(0); k<nnz; k++) A[k] = 2.0; */ - CRSMatrix<double> mat (n, iA, jA, A); + MathLib::CRSMatrix<double> mat (n, iA, jA, A); + std::cout << mat.getNRows() << " x " << mat.getNCols() << std::endl; // CRSMatrixOpenMP<double> mat (n, iA, jA, A, n_threads); // CRSMatrixPThreads<double> mat (n, iA, jA, A, n_threads); diff --git a/SimpleTests/SolverTests/CMakeLists.txt b/SimpleTests/SolverTests/CMakeLists.txt new file mode 100644 index 00000000000..67123af7846 --- /dev/null +++ b/SimpleTests/SolverTests/CMakeLists.txt @@ -0,0 +1,44 @@ +## pthread ## +SET ( CMAKE_THREAD_PREFER_PTHREAD On ) +FIND_PACKAGE( Threads ) + +# Find blas +FIND_PACKAGE ( BLAS REQUIRED ) +# Find lapack +FIND_PACKAGE ( LAPACK REQUIRED ) + +IF ( CMAKE_USE_PTHREADS_INIT ) + SET (HAVE_PTHREADS TRUE) + MESSAGE (STATUS "pthread library found." ) +ENDIF (CMAKE_USE_PTHREADS_INIT ) + +INCLUDE_DIRECTORIES( + . + ../../Base/ + ../../MathLib/ +) + +# Create the executable +ADD_EXECUTABLE( ConjugateGradientUnpreconditioned + ConjugateGradientUnpreconditioned.cpp + ${SOURCES} + ${HEADERS} +) + +ADD_EXECUTABLE( ConjugateGradientDiagPrecond + ConjugateGradientDiagonalPreconditioned.cpp + ../Preconditioner/generateDiagPrecond.cpp + ${SOURCES} + ${HEADERS} +) + +TARGET_LINK_LIBRARIES ( ConjugateGradientUnpreconditioned + ${BLAS_LIBRARIES} + ${LAPACK_LIBRARIES} +) + +TARGET_LINK_LIBRARIES ( ConjugateGradientDiagPrecond + ${BLAS_LIBRARIES} + ${LAPACK_LIBRARIES} +) + diff --git a/SimpleTests/SolverTests/ConjugateGradientDiagonalPreconditioned.cpp b/SimpleTests/SolverTests/ConjugateGradientDiagonalPreconditioned.cpp new file mode 100644 index 00000000000..263f9ffb572 --- /dev/null +++ b/SimpleTests/SolverTests/ConjugateGradientDiagonalPreconditioned.cpp @@ -0,0 +1,75 @@ +#include <fstream> +#include <iostream> +#include <cmath> +#include <omp.h> +#include "CG.h" +#include "CRSMatrixDiagPrecond.h" +#include "sparse.h" +#include "vector_io.h" +#include "timeMeasurement.h" + +int main(int argc, char *argv[]) +{ + if (argc != 4) { + std::cout << "Usage: " << argv[0] << " matrix rhs number-of-threads" << std::endl; + return -1; + } + + // read number of threads + unsigned num_omp_threads (1); + num_omp_threads = atoi (argv[3]); + + // *** reading matrix in crs format from file + std::string fname(argv[1]); + CRSMatrixDiagPrecond *mat (new CRSMatrixDiagPrecond(fname, num_omp_threads)); + + unsigned n (mat->getNRows()); + bool verbose (true); + if (verbose) + std::cout << "Parameters read: n=" << n << std::endl; + + double *x(new double[n]); + double *b(new double[n]); + + // *** init start vector x + for (size_t k(0); k<n; k++) { + x[k] = 0.0; + } + // *** read rhs + fname = argv[2]; + std::ifstream in(fname.c_str()); + if (in) { + read (in, n, b); + in.close(); + } else { + std::cout << "problem reading rhs - initializing b with 1.0" << std::endl; + for (size_t k(0); k<n; k++) { + b[k] = 1.0; + } + } + + + if (verbose) + std::cout << "solving system with PCG method (diagonal preconditioner) ... " << std::flush; + time_t start_time, end_time; + time(&start_time); + double cg_time (cputime(0.0)); + double eps (1.0e-6); + unsigned steps (4000); + CG (mat, b, x, eps, steps, num_omp_threads); + cg_time = cputime(cg_time); + time(&end_time); + if (verbose) { + std::cout << " in " << steps << " iterations" << std::endl; + std::cout << "\t(residuum is " << eps << ") took " << cg_time << " sec time and " << (end_time-start_time) << " sec" << std::endl; + } else { + std::cout << end_time-start_time << std::endl; + } + + delete mat; + delete [] x; + delete [] b; + + return 0; +} + diff --git a/SimpleTests/SolverTests/ConjugateGradientUnpreconditioned.cpp b/SimpleTests/SolverTests/ConjugateGradientUnpreconditioned.cpp new file mode 100644 index 00000000000..d24ccac97ec --- /dev/null +++ b/SimpleTests/SolverTests/ConjugateGradientUnpreconditioned.cpp @@ -0,0 +1,66 @@ +#include <fstream> +#include <iostream> +#include <omp.h> +#include "CG.h" +#include "CRSMatrix.h" +#include "sparse.h" +#include "vector_io.h" +#include "timeMeasurement.h" + +int main(int argc, char *argv[]) +{ + // *** reading matrix in crs format from file + std::string fname("/work/fischeth/data/testmat.bin"); +// std::ifstream in(fname.c_str(), std::ios::binary); + CRSMatrix<double> *mat (new CRSMatrix<double>(fname, 1)); +/* double *A(NULL); + unsigned *iA(NULL), *jA(NULL), n; + if (in) { + std::cout << "reading matrix from " << fname << " ... " << std::flush; + CS_read(in, n, iA, jA, A); + in.close(); + std::cout << " ok" << std::endl; + } + unsigned nnz(iA[n]); +*/ + unsigned n (mat->getNRows()); + std::cout << "Parameters read: n=" << n << std::endl; + + double *x(new double[n]); + double *b(new double[n]); + + // *** init start vector x + for (size_t k(0); k<n; k++) { + x[k] = 1.0; + } + // *** read rhs + fname = "/work/fischeth/data/rhs.dat"; + std::ifstream in(fname.c_str()); + if (in) { + read (in, n, b); + in.close(); + } else { + std::cout << "problem reading rhs - initializing b with 1.0" << std::endl; + for (size_t k(0); k<n; k++) { + b[k] = 1.0; + } + } + + std::cout << "solving system with CG method ... " << std::flush; + time_t start_time, end_time; + time(&start_time); + double cg_time (cputime(0.0)); + double eps (1.0e-6); + unsigned steps (4000); + CG (mat, b, x, eps, steps, 1); + cg_time = cputime(cg_time); + time(&end_time); + std::cout << " in " << steps << " iterations (residuum is " << eps << ") took " << cg_time << " sec time and " << (end_time-start_time) << " sec" << std::endl; + + delete mat; + delete [] x; + delete [] b; + + return 0; +} + -- GitLab