From f8560f56814541062aee56bd1988a5c3eda7321b Mon Sep 17 00:00:00 2001 From: Dmitri Naumov <github@naumov.de> Date: Sat, 14 May 2016 17:52:11 +0000 Subject: [PATCH] [MaL] Tabs to whitespaces. --- MathLib/CMakeLists.txt | 28 +- MathLib/ConstantFunction.h | 18 +- .../LinearIntervalInterpolation.h | 64 +- .../PiecewiseLinearInterpolation.cpp | 40 +- .../PiecewiseLinearInterpolation.h | 66 +- MathLib/LinAlg/Dense/DenseMatrix-impl.h | 362 ++++----- MathLib/LinAlg/Dense/DenseMatrix.h | 64 +- MathLib/LinAlg/Eigen/EigenTools.cpp | 4 +- MathLib/LinAlg/Eigen/EigenTools.h | 16 +- MathLib/LinAlg/FinalizeMatrixAssembly.h | 2 +- MathLib/LinAlg/Lis/LisMatrix.h | 4 +- MathLib/LinAlg/Lis/LisOption.h | 26 +- MathLib/LinAlg/Lis/LisTools.cpp | 112 +-- MathLib/LinAlg/Lis/LisTools.h | 4 +- MathLib/LinAlg/Lis/LisVector.cpp | 44 +- MathLib/LinAlg/Lis/LisVector.h | 176 ++--- .../Preconditioner/generateDiagPrecond.cpp | 104 +-- .../Preconditioner/generateDiagPrecond.h | 6 +- MathLib/LinAlg/RowColumnIndices.h | 12 +- MathLib/LinAlg/Solvers/BiCGStab.cpp | 242 +++--- MathLib/LinAlg/Solvers/CG.cpp | 154 ++-- MathLib/LinAlg/Solvers/CG.h | 4 +- MathLib/LinAlg/Solvers/CGParallel.cpp | 208 ++--- MathLib/LinAlg/Solvers/GMRes.cpp | 264 +++---- MathLib/LinAlg/Solvers/GaussAlgorithm-impl.h | 104 +-- MathLib/LinAlg/Solvers/GaussAlgorithm.h | 110 +-- .../LinAlg/Solvers/IterativeLinearSolver.h | 4 +- MathLib/LinAlg/Solvers/TriangularSolve-impl.h | 58 +- MathLib/LinAlg/Solvers/blas.h | 466 +++++------ MathLib/LinAlg/Sparse/CRSMatrix.h | 724 +++++++++--------- MathLib/LinAlg/Sparse/CRSMatrixDiagPrecond.h | 100 +-- MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h | 38 +- MathLib/LinAlg/Sparse/CRSMatrixPThreads.h | 142 ++-- MathLib/LinAlg/Sparse/CRSSymMatrix.h | 76 +- MathLib/LinAlg/Sparse/CRSTools-impl.h | 80 +- MathLib/LinAlg/Sparse/CRSTools.h | 4 +- MathLib/LinAlg/Sparse/CRSTranspose.h | 2 +- .../LinAlg/Sparse/MatrixSparsityPattern.cpp | 10 +- MathLib/LinAlg/Sparse/MatrixSparsityPattern.h | 32 +- .../NestedDissectionPermutation/AdjMat.cpp | 330 ++++---- .../NestedDissectionPermutation/AdjMat.h | 64 +- .../CRSMatrixReordered.cpp | 86 +-- .../CRSMatrixReordered.h | 8 +- .../CRSMatrixReorderedOpenMP.cpp | 4 +- .../CRSMatrixReorderedOpenMP.h | 6 +- .../NestedDissectionPermutation/Cluster.cpp | 304 ++++---- .../NestedDissectionPermutation/Cluster.h | 88 +-- .../ClusterBase.cpp | 68 +- .../NestedDissectionPermutation/ClusterBase.h | 138 ++-- .../NestedDissectionPermutation/Separator.cpp | 6 +- .../NestedDissectionPermutation/Separator.h | 50 +- MathLib/LinAlg/Sparse/SparseMatrixBase.h | 66 +- MathLib/LinAlg/Sparse/amuxCRS.cpp | 238 +++--- MathLib/LinAlg/Sparse/amuxCRS.h | 52 +- MathLib/LinAlg/Sparse/sparse.h | 52 +- MathLib/LinearFunction.h | 40 +- MathLib/MathTools.cpp | 38 +- MathLib/MathTools.h | 44 +- MathLib/ODE/CVodeSolver.cpp | 400 +++++----- MathLib/ODE/CVodeSolver.h | 36 +- MathLib/ODE/ConcreteODESolver.h | 110 +-- MathLib/ODE/FunctionHandles.h | 120 +-- MathLib/ODE/ODESolver.h | 186 ++--- MathLib/ODE/ODESolverBuilder.h | 14 +- MathLib/Point3d.h | 6 +- MathLib/Point3dWithID.h | 66 +- MathLib/TemplatePoint.h | 212 ++--- MathLib/TemplateWeightedPoint.h | 16 +- MathLib/Vector3.h | 268 +++---- MathLib/vector_io.h | 104 +-- 70 files changed, 3597 insertions(+), 3597 deletions(-) diff --git a/MathLib/CMakeLists.txt b/MathLib/CMakeLists.txt index bf640b887ab..cc22a818570 100644 --- a/MathLib/CMakeLists.txt +++ b/MathLib/CMakeLists.txt @@ -27,35 +27,35 @@ GET_SOURCE_FILES(SOURCES_ODE ODE) set(SOURCES ${SOURCES} ${SOURCES_ODE}) if(OGS_USE_EIGEN) - GET_SOURCE_FILES(SOURCES_LINALG_EIGEN LinAlg/Eigen) - set(SOURCES ${SOURCES} ${SOURCES_LINALG_EIGEN}) + GET_SOURCE_FILES(SOURCES_LINALG_EIGEN LinAlg/Eigen) + set(SOURCES ${SOURCES} ${SOURCES_LINALG_EIGEN}) endif() if(OGS_USE_LIS) - GET_SOURCE_FILES(SOURCES_LINALG_LIS LinAlg/Lis) - set(SOURCES ${SOURCES} ${SOURCES_LINALG_LIS}) + GET_SOURCE_FILES(SOURCES_LINALG_LIS LinAlg/Lis) + set(SOURCES ${SOURCES} ${SOURCES_LINALG_LIS}) endif() if(OGS_USE_EIGEN AND OGS_USE_LIS) - GET_SOURCE_FILES(SOURCES_LINALG_EIGENLIS LinAlg/EigenLis) - set(SOURCES ${SOURCES} ${SOURCES_LINALG_EIGENLIS}) + GET_SOURCE_FILES(SOURCES_LINALG_EIGENLIS LinAlg/EigenLis) + set(SOURCES ${SOURCES} ${SOURCES_LINALG_EIGENLIS}) endif() if(OGS_USE_PETSC) - GET_SOURCE_FILES(SOURCES_LINALG_PETSC LinAlg/PETSc) - set(SOURCES ${SOURCES} ${SOURCES_LINALG_PETSC}) + GET_SOURCE_FILES(SOURCES_LINALG_PETSC LinAlg/PETSc) + set(SOURCES ${SOURCES} ${SOURCES_LINALG_PETSC}) endif() if(METIS_FOUND) - GET_SOURCE_FILES(SOURCES_LINALG_SPARSE_NESTEDDISSECTION LinAlg/Sparse/NestedDissectionPermutation) - set(SOURCES ${SOURCES} ${SOURCES_LINALG_SPARSE_NESTEDDISSECTION}) + GET_SOURCE_FILES(SOURCES_LINALG_SPARSE_NESTEDDISSECTION LinAlg/Sparse/NestedDissectionPermutation) + set(SOURCES ${SOURCES} ${SOURCES_LINALG_SPARSE_NESTEDDISSECTION}) endif () GET_SOURCE_FILES(SOURCES_NONLINEAR Nonlinear) set(SOURCES ${SOURCES} ${SOURCES_NONLINEAR}) if(METIS_FOUND) - include_directories(${METIS_INCLUDE_DIR}) + include_directories(${METIS_INCLUDE_DIR}) endif() # Create the library @@ -68,7 +68,7 @@ target_link_libraries(MathLib ) if (CVODE_FOUND) - target_link_libraries(MathLib ${CVODE_LIBRARIES}) + target_link_libraries(MathLib ${CVODE_LIBRARIES}) endif() if(METIS_FOUND) @@ -88,8 +88,8 @@ if (OGS_USE_PETSC) endif() if(TARGET Boost) - add_dependencies(MathLib Boost) + add_dependencies(MathLib Boost) endif() if(TARGET Eigen) - add_dependencies(MathLib Eigen) + add_dependencies(MathLib Eigen) endif() diff --git a/MathLib/ConstantFunction.h b/MathLib/ConstantFunction.h index a411a882fd8..4c99eaf9603 100644 --- a/MathLib/ConstantFunction.h +++ b/MathLib/ConstantFunction.h @@ -22,23 +22,23 @@ namespace MathLib /** * A constant function. * \f[ - * f(x_1,...,x_k)=a_0 + * f(x_1,...,x_k)=a_0 * \f] */ template <typename T_TYPE> class ConstantFunction { public: - explicit ConstantFunction(T_TYPE const& value) - : _value(value) - {} + explicit ConstantFunction(T_TYPE const& value) + : _value(value) + {} - T_TYPE operator()() const - { - return _value; - } + T_TYPE operator()() const + { + return _value; + } private: - T_TYPE const _value; + T_TYPE const _value; }; } diff --git a/MathLib/InterpolationAlgorithms/LinearIntervalInterpolation.h b/MathLib/InterpolationAlgorithms/LinearIntervalInterpolation.h index 342aeb1e744..08c94b4fa85 100644 --- a/MathLib/InterpolationAlgorithms/LinearIntervalInterpolation.h +++ b/MathLib/InterpolationAlgorithms/LinearIntervalInterpolation.h @@ -32,50 +32,50 @@ namespace MathLib { template <typename NUMERIC_TYPE> class LinearIntervalInterpolation { public: - /** - * Constructor of class template for a linear map \f$y = m \cdot x + n\f$. - * Under the prerequisite \f$a \neq b\f$ it initializes the coefficients - * \f$m\f$ and \f$n\f$ in a correct way. - * @param a first endpoint of the first interval - * @param b second endpoint of the first interval - * @param c first endpoint of the second interval - * @param d second endpoint of the second interval - */ - LinearIntervalInterpolation(NUMERIC_TYPE a, NUMERIC_TYPE b, NUMERIC_TYPE c, NUMERIC_TYPE d); - /** - * Method computes the value at point \f$x\f$ obtained by linear interpolation. - * @param x the point the interpolation value is searched for - * @return the interpolation value at point \f$x\f$ - */ - inline NUMERIC_TYPE operator() (NUMERIC_TYPE x) const; + /** + * Constructor of class template for a linear map \f$y = m \cdot x + n\f$. + * Under the prerequisite \f$a \neq b\f$ it initializes the coefficients + * \f$m\f$ and \f$n\f$ in a correct way. + * @param a first endpoint of the first interval + * @param b second endpoint of the first interval + * @param c first endpoint of the second interval + * @param d second endpoint of the second interval + */ + LinearIntervalInterpolation(NUMERIC_TYPE a, NUMERIC_TYPE b, NUMERIC_TYPE c, NUMERIC_TYPE d); + /** + * Method computes the value at point \f$x\f$ obtained by linear interpolation. + * @param x the point the interpolation value is searched for + * @return the interpolation value at point \f$x\f$ + */ + inline NUMERIC_TYPE operator() (NUMERIC_TYPE x) const; private: - /** - * the slope of the linear map - */ - NUMERIC_TYPE _m; - /** - * the offset of the linear map for \f$x\f$ equals zero - */ - NUMERIC_TYPE _n; + /** + * the slope of the linear map + */ + NUMERIC_TYPE _m; + /** + * the offset of the linear map for \f$x\f$ equals zero + */ + NUMERIC_TYPE _n; }; template <typename NUMERIC_TYPE> LinearIntervalInterpolation<NUMERIC_TYPE>::LinearIntervalInterpolation(NUMERIC_TYPE a, NUMERIC_TYPE b, - NUMERIC_TYPE c, NUMERIC_TYPE d) : - _m (d-c), _n(0.0) + NUMERIC_TYPE c, NUMERIC_TYPE d) : + _m (d-c), _n(0.0) { - if (b == a) { - throw std::runtime_error("LinearIntervalInterpolation::LinearIntervalInterpolation: a == b, empty interval"); - } - _m /= (b-a); - _n = c - _m * a; + if (b == a) { + throw std::runtime_error("LinearIntervalInterpolation::LinearIntervalInterpolation: a == b, empty interval"); + } + _m /= (b-a); + _n = c - _m * a; } template <typename NUMERIC_TYPE> inline NUMERIC_TYPE LinearIntervalInterpolation<NUMERIC_TYPE>::operator() (NUMERIC_TYPE x) const { - return _m * x + _n; + return _m * x + _n; } } // end namespace MathLib diff --git a/MathLib/InterpolationAlgorithms/PiecewiseLinearInterpolation.cpp b/MathLib/InterpolationAlgorithms/PiecewiseLinearInterpolation.cpp index 6444ca85d27..79c8c0cd939 100644 --- a/MathLib/InterpolationAlgorithms/PiecewiseLinearInterpolation.cpp +++ b/MathLib/InterpolationAlgorithms/PiecewiseLinearInterpolation.cpp @@ -28,31 +28,31 @@ PiecewiseLinearInterpolation::PiecewiseLinearInterpolation( : _supp_pnts(std::move(supporting_points)), _values_at_supp_pnts(std::move(values_at_supp_pnts)) { - if (!supp_pnts_sorted) { - BaseLib::quicksort<double, double>(_supp_pnts, static_cast<std::size_t> (0), - _supp_pnts.size(), _values_at_supp_pnts); - } + if (!supp_pnts_sorted) { + BaseLib::quicksort<double, double>(_supp_pnts, static_cast<std::size_t> (0), + _supp_pnts.size(), _values_at_supp_pnts); + } } double PiecewiseLinearInterpolation::getValue(double pnt_to_interpolate) const { - // search interval that has the point inside - std::size_t interval_idx(std::numeric_limits<std::size_t>::max()); - if (pnt_to_interpolate <= _supp_pnts.front()) { - interval_idx = 0; - } else { - if (_supp_pnts.back() <= pnt_to_interpolate) { - interval_idx = _supp_pnts.size() - 2; - } else { - auto const& it(std::lower_bound(_supp_pnts.begin(), _supp_pnts.end(), pnt_to_interpolate)); - interval_idx = std::distance(_supp_pnts.begin(), it) - 1; - } - } + // search interval that has the point inside + std::size_t interval_idx(std::numeric_limits<std::size_t>::max()); + if (pnt_to_interpolate <= _supp_pnts.front()) { + interval_idx = 0; + } else { + if (_supp_pnts.back() <= pnt_to_interpolate) { + interval_idx = _supp_pnts.size() - 2; + } else { + auto const& it(std::lower_bound(_supp_pnts.begin(), _supp_pnts.end(), pnt_to_interpolate)); + interval_idx = std::distance(_supp_pnts.begin(), it) - 1; + } + } - // compute linear interpolation polynom: y = m * (x - support[i]) + value[i] - const double m((_values_at_supp_pnts[interval_idx + 1] - _values_at_supp_pnts[interval_idx]) - / (_supp_pnts[interval_idx + 1] - _supp_pnts[interval_idx])); + // compute linear interpolation polynom: y = m * (x - support[i]) + value[i] + const double m((_values_at_supp_pnts[interval_idx + 1] - _values_at_supp_pnts[interval_idx]) + / (_supp_pnts[interval_idx + 1] - _supp_pnts[interval_idx])); - return m * (pnt_to_interpolate - _supp_pnts[interval_idx]) + _values_at_supp_pnts[interval_idx]; + return m * (pnt_to_interpolate - _supp_pnts[interval_idx]) + _values_at_supp_pnts[interval_idx]; } } // end MathLib diff --git a/MathLib/InterpolationAlgorithms/PiecewiseLinearInterpolation.h b/MathLib/InterpolationAlgorithms/PiecewiseLinearInterpolation.h index 81136e6eb73..f0bdbb08568 100644 --- a/MathLib/InterpolationAlgorithms/PiecewiseLinearInterpolation.h +++ b/MathLib/InterpolationAlgorithms/PiecewiseLinearInterpolation.h @@ -25,42 +25,42 @@ namespace MathLib class PiecewiseLinearInterpolation final { public: - /** - * The constructor copies the entries of the vector of supporting points - * \f$(x_0, x_1, \dots, x_n)\f$ and the entries of the vector of values at - * the supporting points \f$(y_0, y_1, \dots, y_n)\f$ where \f$n\f$ - * is the number of entries of the vector. The number of supporting - * points must be the same like the number of values at the supporting - * points. It is assumed that \f$x_j\f$ corresponds to - * \f$y_j\f$ for all \f$j \in [0, n]\f$. - * - * It is not assumed that the supporting points are sorted, i.e. - * \f$x_0 < x_1 < \dots < x_n\f$. It is assumed, that the supporting points - * are pairwise different. The user can set the flag supp_pnts_sorted to - * true, if the supporting points are sorted. This will save some setup - * time. - * @param supporting_points vector of supporting points - * @param values_at_supp_pnts vector of values at the supporting points - * @param supp_pnts_sorted false (default), if it is sure the supporting points are sorted - * one can set the switch to true - */ - PiecewiseLinearInterpolation(std::vector<double>&& supporting_points, - std::vector<double>&& values_at_supp_pnts, - bool supp_pnts_sorted = false); + /** + * The constructor copies the entries of the vector of supporting points + * \f$(x_0, x_1, \dots, x_n)\f$ and the entries of the vector of values at + * the supporting points \f$(y_0, y_1, \dots, y_n)\f$ where \f$n\f$ + * is the number of entries of the vector. The number of supporting + * points must be the same like the number of values at the supporting + * points. It is assumed that \f$x_j\f$ corresponds to + * \f$y_j\f$ for all \f$j \in [0, n]\f$. + * + * It is not assumed that the supporting points are sorted, i.e. + * \f$x_0 < x_1 < \dots < x_n\f$. It is assumed, that the supporting points + * are pairwise different. The user can set the flag supp_pnts_sorted to + * true, if the supporting points are sorted. This will save some setup + * time. + * @param supporting_points vector of supporting points + * @param values_at_supp_pnts vector of values at the supporting points + * @param supp_pnts_sorted false (default), if it is sure the supporting points are sorted + * one can set the switch to true + */ + PiecewiseLinearInterpolation(std::vector<double>&& supporting_points, + std::vector<double>&& values_at_supp_pnts, + bool supp_pnts_sorted = false); - /** - * \brief Calculates the interpolation value. - * @param pnt_to_interpolate The point should be located within the range - * \f$[x_{\min}, x_{\max}]\f$, where \f$x_{\min} = \min_{1 \le j \le n} x_j\f$ and - * \f$x_{\max} = \max_{1 \le j \le n} x_j\f$. Points outside of this interval are - * extrapolated. - * @return The interpolated value. - */ - double getValue(double pnt_to_interpolate) const; + /** + * \brief Calculates the interpolation value. + * @param pnt_to_interpolate The point should be located within the range + * \f$[x_{\min}, x_{\max}]\f$, where \f$x_{\min} = \min_{1 \le j \le n} x_j\f$ and + * \f$x_{\max} = \max_{1 \le j \le n} x_j\f$. Points outside of this interval are + * extrapolated. + * @return The interpolated value. + */ + double getValue(double pnt_to_interpolate) const; private: - std::vector<double> _supp_pnts; - std::vector<double> _values_at_supp_pnts; + std::vector<double> _supp_pnts; + std::vector<double> _values_at_supp_pnts; }; } // end namespace MathLib diff --git a/MathLib/LinAlg/Dense/DenseMatrix-impl.h b/MathLib/LinAlg/Dense/DenseMatrix-impl.h index 0a91b3053a8..10665c5c157 100644 --- a/MathLib/LinAlg/Dense/DenseMatrix-impl.h +++ b/MathLib/LinAlg/Dense/DenseMatrix-impl.h @@ -17,35 +17,35 @@ namespace MathLib template<typename FP_TYPE, typename IDX_TYPE> DenseMatrix<FP_TYPE, IDX_TYPE>::DenseMatrix(IDX_TYPE rows, IDX_TYPE cols) : - _n_rows(rows), _n_cols(cols), _data(new FP_TYPE[_n_rows * _n_cols]) + _n_rows(rows), _n_cols(cols), _data(new FP_TYPE[_n_rows * _n_cols]) {} template<typename FP_TYPE, typename IDX_TYPE> DenseMatrix<FP_TYPE, IDX_TYPE>::DenseMatrix(IDX_TYPE rows, IDX_TYPE cols, - FP_TYPE const& initial_value) : - _n_rows(rows), _n_cols(cols), _data(new FP_TYPE[_n_rows * _n_cols]) + FP_TYPE const& initial_value) : + _n_rows(rows), _n_cols(cols), _data(new FP_TYPE[_n_rows * _n_cols]) { - const IDX_TYPE n(_n_rows * _n_cols); - for (IDX_TYPE k(0); k < n; k++) - _data[k] = initial_value; + const IDX_TYPE n(_n_rows * _n_cols); + for (IDX_TYPE k(0); k < n; k++) + _data[k] = initial_value; } template<typename FP_TYPE, typename IDX_TYPE> DenseMatrix<FP_TYPE, IDX_TYPE>::DenseMatrix (const DenseMatrix<FP_TYPE, IDX_TYPE>& src) : - _n_rows(src.getNRows ()), _n_cols(src.getNCols ()), _data (new FP_TYPE[_n_rows * _n_cols]) + _n_rows(src.getNRows ()), _n_cols(src.getNCols ()), _data (new FP_TYPE[_n_rows * _n_cols]) { - std::copy(src._data, src._data+_n_rows*_n_cols, _data); + std::copy(src._data, src._data+_n_rows*_n_cols, _data); } template<typename FP_TYPE, typename IDX_TYPE> DenseMatrix<FP_TYPE, IDX_TYPE>::DenseMatrix (DenseMatrix<FP_TYPE, IDX_TYPE> &&src) : - _n_rows(src.getNRows()), _n_cols(src.getNCols()) + _n_rows(src.getNRows()), _n_cols(src.getNCols()) { - src._n_rows = 0; - src._n_cols = 0; - _data = src._data; - src._data = nullptr; + src._n_rows = 0; + src._n_cols = 0; + _data = src._data; + src._data = nullptr; } @@ -59,245 +59,245 @@ template <typename FP_TYPE, typename IDX_TYPE> DenseMatrix<FP_TYPE, IDX_TYPE>& DenseMatrix<FP_TYPE, IDX_TYPE>::operator=(DenseMatrix<FP_TYPE, IDX_TYPE> const& rhs) { - if (this == &rhs) - return *this; - - if (_n_rows != rhs.getNRows() || _n_cols != rhs.getNCols()) { - std::string msg("DenseMatrix::operator=(DenseMatrix const& rhs), Dimension mismatch, "); - msg += " left hand side: " + std::to_string(_n_rows) + " x " - + std::to_string(_n_cols); - msg += " right hand side: " + std::to_string(rhs.getNRows()) + " x " - + std::to_string(rhs.getNCols()); - throw std::range_error(msg); - return *this; - } - - std::copy(rhs._data, rhs._data + _n_rows * _n_cols, _data); - - return *this; + if (this == &rhs) + return *this; + + if (_n_rows != rhs.getNRows() || _n_cols != rhs.getNCols()) { + std::string msg("DenseMatrix::operator=(DenseMatrix const& rhs), Dimension mismatch, "); + msg += " left hand side: " + std::to_string(_n_rows) + " x " + + std::to_string(_n_cols); + msg += " right hand side: " + std::to_string(rhs.getNRows()) + " x " + + std::to_string(rhs.getNCols()); + throw std::range_error(msg); + return *this; + } + + std::copy(rhs._data, rhs._data + _n_rows * _n_cols, _data); + + return *this; } template<typename FP_TYPE, typename IDX_TYPE> DenseMatrix<FP_TYPE, IDX_TYPE>& DenseMatrix<FP_TYPE, IDX_TYPE>::operator=(DenseMatrix && rhs) { - _n_rows = rhs._n_rows; - _n_cols = rhs._n_cols; - _data = rhs._data; - - rhs._n_rows = 0; - rhs._n_cols = 0; - rhs._data = nullptr; - return *this; + _n_rows = rhs._n_rows; + _n_cols = rhs._n_cols; + _data = rhs._data; + + rhs._n_rows = 0; + rhs._n_cols = 0; + rhs._data = nullptr; + return *this; } template<typename FP_TYPE, typename IDX_TYPE> DenseMatrix<FP_TYPE, IDX_TYPE>& DenseMatrix<FP_TYPE, IDX_TYPE>::operator=(FP_TYPE const& v) { - std::fill(this->_data, this->_data + this->_n_rows * this->_n_cols, v); - return *this; + std::fill(this->_data, this->_data + this->_n_rows * this->_n_cols, v); + return *this; } template<typename FP_TYPE, typename IDX_TYPE> void DenseMatrix<FP_TYPE, IDX_TYPE>::axpy(FP_TYPE alpha, const FP_TYPE* x, FP_TYPE beta, - FP_TYPE* y) const + FP_TYPE* y) const { - for (IDX_TYPE i(0); i < _n_rows; i++) { - y[i] += beta * y[i]; - for (IDX_TYPE j(0); j < _n_cols; j++) { - y[i] += alpha * _data[address(i, j)] * x[j]; - } - } + for (IDX_TYPE i(0); i < _n_rows; i++) { + y[i] += beta * y[i]; + for (IDX_TYPE j(0); j < _n_cols; j++) { + y[i] += alpha * _data[address(i, j)] * x[j]; + } + } } template<typename FP_TYPE, typename IDX_TYPE> FP_TYPE* DenseMatrix<FP_TYPE, IDX_TYPE>::operator* (FP_TYPE* const& x) const { - return this->operator*(static_cast<FP_TYPE const*>(x)); + return this->operator*(static_cast<FP_TYPE const*>(x)); } template<typename FP_TYPE, typename IDX_TYPE> FP_TYPE* DenseMatrix<FP_TYPE, IDX_TYPE>::operator* (FP_TYPE const* const& x) const { - FP_TYPE *y(new FP_TYPE[_n_rows]); - for (IDX_TYPE i(0); i < _n_rows; i++) { - y[i] = 0.0; - for (IDX_TYPE j(0); j < _n_cols; j++) { - y[i] += _data[address(i, j)] * x[j]; - } - } - - return y; + FP_TYPE *y(new FP_TYPE[_n_rows]); + for (IDX_TYPE i(0); i < _n_rows; i++) { + y[i] = 0.0; + for (IDX_TYPE j(0); j < _n_cols; j++) { + y[i] += _data[address(i, j)] * x[j]; + } + } + + return y; } template<typename FP_TYPE, typename IDX_TYPE> template <typename V> V DenseMatrix<FP_TYPE, IDX_TYPE>::operator* (V const& x) const { - V y(_n_rows); - for (IDX_TYPE i(0); i < _n_rows; i++) { - y[i] = 0.0; - for (IDX_TYPE j(0); j < _n_cols; j++) { - y[i] += _data[address(i, j)] * x[j]; - } - } - - return y; + V y(_n_rows); + for (IDX_TYPE i(0); i < _n_rows; i++) { + y[i] = 0.0; + for (IDX_TYPE j(0); j < _n_cols; j++) { + y[i] += _data[address(i, j)] * x[j]; + } + } + + return y; } template<typename FP_TYPE, typename IDX_TYPE> MathLib::Vector3 DenseMatrix<FP_TYPE, IDX_TYPE>::operator*(MathLib::Vector3 const& x) const { - assert(_n_rows>2); + assert(_n_rows>2); - MathLib::Vector3 y; - for (IDX_TYPE i(0); i < _n_rows; i++) { - y[i] = 0.0; - for (IDX_TYPE j(0); j < _n_cols; j++) { - y[i] += _data[address(i, j)] * x[j]; - } - } + MathLib::Vector3 y; + for (IDX_TYPE i(0); i < _n_rows; i++) { + y[i] = 0.0; + for (IDX_TYPE j(0); j < _n_cols; j++) { + y[i] += _data[address(i, j)] * x[j]; + } + } - return y; + return y; } template<typename FP_TYPE, typename IDX_TYPE> DenseMatrix<FP_TYPE, IDX_TYPE>* DenseMatrix<FP_TYPE, IDX_TYPE>::operator+(const DenseMatrix<FP_TYPE, IDX_TYPE>& mat) const { - // make sure the two matrices have the same dimension. - if (_n_rows != mat.getNRows() || _n_cols != mat.getNCols()) - throw std::range_error("DenseMatrix::operator+, illegal matrix size!"); - - DenseMatrix<FP_TYPE, IDX_TYPE>* y(new DenseMatrix<FP_TYPE, IDX_TYPE>(_n_rows, _n_cols)); - for (IDX_TYPE i = 0; i < _n_rows; i++) { - for (IDX_TYPE j = 0; j < _n_cols; j++) { - (*y)(i, j) = _data[address(i, j)] + mat(i, j); - } - } - - return y; + // make sure the two matrices have the same dimension. + if (_n_rows != mat.getNRows() || _n_cols != mat.getNCols()) + throw std::range_error("DenseMatrix::operator+, illegal matrix size!"); + + DenseMatrix<FP_TYPE, IDX_TYPE>* y(new DenseMatrix<FP_TYPE, IDX_TYPE>(_n_rows, _n_cols)); + for (IDX_TYPE i = 0; i < _n_rows; i++) { + for (IDX_TYPE j = 0; j < _n_cols; j++) { + (*y)(i, j) = _data[address(i, j)] + mat(i, j); + } + } + + return y; } template<typename FP_TYPE, typename IDX_TYPE> DenseMatrix<FP_TYPE, IDX_TYPE>* DenseMatrix<FP_TYPE, IDX_TYPE>::operator-(const DenseMatrix<FP_TYPE, IDX_TYPE>& mat) const { - // make sure the two matrices have the same dimension. - if (_n_rows != mat.getNRows() || _n_cols != mat.getNCols()) - throw std::range_error("DenseMatrix::operator-, illegal matrix size!"); - - DenseMatrix<FP_TYPE, IDX_TYPE>* y(new DenseMatrix<FP_TYPE, IDX_TYPE>(_n_rows, _n_cols)); - for (IDX_TYPE i = 0; i < _n_rows; i++) { - for (IDX_TYPE j = 0; j < _n_cols; j++) { - (*y)(i, j) = _data[address(i, j)] - mat(i, j); - } - } - - return y; + // make sure the two matrices have the same dimension. + if (_n_rows != mat.getNRows() || _n_cols != mat.getNCols()) + throw std::range_error("DenseMatrix::operator-, illegal matrix size!"); + + DenseMatrix<FP_TYPE, IDX_TYPE>* y(new DenseMatrix<FP_TYPE, IDX_TYPE>(_n_rows, _n_cols)); + for (IDX_TYPE i = 0; i < _n_rows; i++) { + for (IDX_TYPE j = 0; j < _n_cols; j++) { + (*y)(i, j) = _data[address(i, j)] - mat(i, j); + } + } + + return y; } template<typename FP_TYPE, typename IDX_TYPE> DenseMatrix<FP_TYPE, IDX_TYPE>* DenseMatrix<FP_TYPE, IDX_TYPE>::operator*(const DenseMatrix<FP_TYPE, IDX_TYPE>& mat) const { - // make sure the two matrices have the same dimension. - if (_n_cols != mat.getNRows()) - throw std::range_error( - "DenseMatrix::operator*, number of rows and cols should be the same!"); - - IDX_TYPE y_cols(mat.getNCols()); - DenseMatrix<FP_TYPE, IDX_TYPE>* y( - new DenseMatrix<FP_TYPE, IDX_TYPE>(_n_rows, y_cols, FP_TYPE(0))); - - for (IDX_TYPE i = 0; i < _n_rows; i++) { - for (IDX_TYPE j = 0; j < y_cols; j++) { - for (IDX_TYPE k = 0; k < _n_cols; k++) - (*y)(i, j) += _data[address(i, k)] * mat(k, j); - } - } - - return y; + // make sure the two matrices have the same dimension. + if (_n_cols != mat.getNRows()) + throw std::range_error( + "DenseMatrix::operator*, number of rows and cols should be the same!"); + + IDX_TYPE y_cols(mat.getNCols()); + DenseMatrix<FP_TYPE, IDX_TYPE>* y( + new DenseMatrix<FP_TYPE, IDX_TYPE>(_n_rows, y_cols, FP_TYPE(0))); + + for (IDX_TYPE i = 0; i < _n_rows; i++) { + for (IDX_TYPE j = 0; j < y_cols; j++) { + for (IDX_TYPE k = 0; k < _n_cols; k++) + (*y)(i, j) += _data[address(i, k)] * mat(k, j); + } + } + + return y; } template<typename FP_TYPE, typename IDX_TYPE> DenseMatrix<FP_TYPE, IDX_TYPE>* DenseMatrix<FP_TYPE, IDX_TYPE>::transpose() const { - DenseMatrix<FP_TYPE, IDX_TYPE>* y(new DenseMatrix<FP_TYPE, IDX_TYPE>(_n_cols, _n_rows)); - - for (IDX_TYPE i = 0; i < _n_rows; i++) { - for (IDX_TYPE j = 0; j < _n_cols; j++) { - (*y)(j, i) = _data[address(i, j)]; - } - } - return y; + DenseMatrix<FP_TYPE, IDX_TYPE>* y(new DenseMatrix<FP_TYPE, IDX_TYPE>(_n_cols, _n_rows)); + + for (IDX_TYPE i = 0; i < _n_rows; i++) { + for (IDX_TYPE j = 0; j < _n_cols; j++) { + (*y)(j, i) = _data[address(i, j)]; + } + } + return y; } template<typename FP_TYPE, typename IDX_TYPE> void DenseMatrix<FP_TYPE, IDX_TYPE>::transposeInPlace() { - if (_n_rows==_n_cols) { // square matrix - for (IDX_TYPE i = 0; i < _n_rows; i++) - for (IDX_TYPE j = i+1; j < _n_cols; j++) - std::swap(_data[address(i, j)], _data[address(j, i)]); - } else { // non-square matrix - const DenseMatrix<FP_TYPE, IDX_TYPE> org(*this); - std::swap(_n_rows, _n_cols); - for (IDX_TYPE i = 0; i < _n_rows; i++) { - for (IDX_TYPE j = 0; j < _n_cols; j++) { - _data[address(i, j)] = org(j, i); - } - } - } + if (_n_rows==_n_cols) { // square matrix + for (IDX_TYPE i = 0; i < _n_rows; i++) + for (IDX_TYPE j = i+1; j < _n_cols; j++) + std::swap(_data[address(i, j)], _data[address(j, i)]); + } else { // non-square matrix + const DenseMatrix<FP_TYPE, IDX_TYPE> org(*this); + std::swap(_n_rows, _n_cols); + for (IDX_TYPE i = 0; i < _n_rows; i++) { + for (IDX_TYPE j = 0; j < _n_cols; j++) { + _data[address(i, j)] = org(j, i); + } + } + } } template<typename FP_TYPE, typename IDX_TYPE> DenseMatrix<FP_TYPE, IDX_TYPE>* DenseMatrix<FP_TYPE, IDX_TYPE>::getSubMatrix( - IDX_TYPE b_row, IDX_TYPE b_col, - IDX_TYPE e_row, IDX_TYPE e_col) const + IDX_TYPE b_row, IDX_TYPE b_col, + IDX_TYPE e_row, IDX_TYPE e_col) const { - if (b_row >= e_row | b_col >= e_col) - throw std::range_error("DenseMatrix::getSubMatrix() illegal sub matrix"); - if (e_row > _n_rows | e_col > _n_cols) - throw std::range_error("DenseMatrix::getSubMatrix() illegal sub matrix"); - - DenseMatrix<FP_TYPE, IDX_TYPE>* y( - new DenseMatrix<FP_TYPE, IDX_TYPE>(e_row - b_row, e_col - b_col)); - for (IDX_TYPE i = b_row; i < e_row; i++) { - for (IDX_TYPE j = b_col; j < e_col; j++) { - (*y)(i - b_row, j - b_col) = _data[address(i, j)]; - } - } - return y; + if (b_row >= e_row | b_col >= e_col) + throw std::range_error("DenseMatrix::getSubMatrix() illegal sub matrix"); + if (e_row > _n_rows | e_col > _n_cols) + throw std::range_error("DenseMatrix::getSubMatrix() illegal sub matrix"); + + DenseMatrix<FP_TYPE, IDX_TYPE>* y( + new DenseMatrix<FP_TYPE, IDX_TYPE>(e_row - b_row, e_col - b_col)); + for (IDX_TYPE i = b_row; i < e_row; i++) { + for (IDX_TYPE j = b_col; j < e_col; j++) { + (*y)(i - b_row, j - b_col) = _data[address(i, j)]; + } + } + return y; } template<typename FP_TYPE, typename IDX_TYPE> void DenseMatrix<FP_TYPE, IDX_TYPE>::setSubMatrix(IDX_TYPE b_row, IDX_TYPE b_col, - const DenseMatrix<FP_TYPE, IDX_TYPE>& sub_mat) + const DenseMatrix<FP_TYPE, IDX_TYPE>& sub_mat) { - if (b_row + sub_mat.getNRows() > _n_rows | b_col + sub_mat.getNCols() > _n_cols) - throw std::range_error("DenseMatrix::setSubMatrix() sub matrix to big"); - - for (IDX_TYPE i = 0; i < sub_mat.getNRows(); i++) { - for (IDX_TYPE j = 0; j < sub_mat.getNCols(); j++) { - _data[address(i + b_row, j + b_col)] = sub_mat(i, j); - } - } + if (b_row + sub_mat.getNRows() > _n_rows | b_col + sub_mat.getNCols() > _n_cols) + throw std::range_error("DenseMatrix::setSubMatrix() sub matrix to big"); + + for (IDX_TYPE i = 0; i < sub_mat.getNRows(); i++) { + for (IDX_TYPE j = 0; j < sub_mat.getNCols(); j++) { + _data[address(i + b_row, j + b_col)] = sub_mat(i, j); + } + } } template<typename FP_TYPE, typename IDX_TYPE> FP_TYPE& DenseMatrix<FP_TYPE, IDX_TYPE>::operator() (IDX_TYPE row, IDX_TYPE col) { - assert((row < _n_rows) && (col < _n_cols)); - return _data [address(row,col)]; + assert((row < _n_rows) && (col < _n_cols)); + return _data [address(row,col)]; } @@ -305,44 +305,44 @@ template<typename FP_TYPE, typename IDX_TYPE> FP_TYPE const& DenseMatrix<FP_TYPE, IDX_TYPE>::operator() (IDX_TYPE row, IDX_TYPE col) const { - assert((row < _n_rows) && (col < _n_cols)); - return _data[address(row, col)]; + assert((row < _n_rows) && (col < _n_cols)); + return _data[address(row, col)]; } template <typename FP_TYPE, typename IDX_TYPE> void DenseMatrix<FP_TYPE, IDX_TYPE>::write (std::ostream &out) const { - out << _n_rows << " " << _n_cols << "\n"; - for (IDX_TYPE i = 0; i < _n_rows; i++) { - for (IDX_TYPE j = 0; j < _n_cols; j++) { - out << _data[address(i, j)] << "\t"; - } - out << "\n"; - } + out << _n_rows << " " << _n_cols << "\n"; + for (IDX_TYPE i = 0; i < _n_rows; i++) { + for (IDX_TYPE j = 0; j < _n_cols; j++) { + out << _data[address(i, j)] << "\t"; + } + out << "\n"; + } } template <typename FP_TYPE, typename IDX_TYPE> void DenseMatrix<FP_TYPE, IDX_TYPE>::setIdentity() { - (*this) = 0.0; - const IDX_TYPE n_square_rows = std::min(_n_rows, _n_cols); - for (IDX_TYPE i=0; i<n_square_rows; i++) - _data[address(i,i)] = 1.0; + (*this) = 0.0; + const IDX_TYPE n_square_rows = std::min(_n_rows, _n_cols); + for (IDX_TYPE i=0; i<n_square_rows; i++) + _data[address(i,i)] = 1.0; } template <typename FP_TYPE, typename IDX_TYPE> FP_TYPE sqrFrobNrm (const DenseMatrix<FP_TYPE, IDX_TYPE> &mat) { - FP_TYPE nrm(static_cast<FP_TYPE>(0)); - IDX_TYPE i, j; - for (j = 0; j < mat.getNCols(); j++) - for (i = 0; i < mat.getNRows(); i++) - nrm += mat(i, j) * mat(i, j); + FP_TYPE nrm(static_cast<FP_TYPE>(0)); + IDX_TYPE i, j; + for (j = 0; j < mat.getNCols(); j++) + for (i = 0; i < mat.getNRows(); i++) + nrm += mat(i, j) * mat(i, j); - return nrm; + return nrm; } } // end namespace MathLib diff --git a/MathLib/LinAlg/Dense/DenseMatrix.h b/MathLib/LinAlg/Dense/DenseMatrix.h index 096a3c92611..0b80dc1e3f4 100644 --- a/MathLib/LinAlg/Dense/DenseMatrix.h +++ b/MathLib/LinAlg/Dense/DenseMatrix.h @@ -30,8 +30,8 @@ namespace MathLib { template <typename FP_TYPE, typename IDX_TYPE = std::size_t> class DenseMatrix { public: - typedef FP_TYPE FP_T; - typedef IDX_TYPE IDX_T; + typedef FP_TYPE FP_T; + typedef IDX_TYPE IDX_T; public: /// Dense square matrix constructor. @@ -144,36 +144,36 @@ public: */ void write (std::ostream& out) const; - /** - * get the number of rows - * @return the number of rows - */ - IDX_TYPE getNRows () const { return _n_rows; } - /** - * get the number of columns - * @return the number of columns - */ - IDX_TYPE getNCols () const { return _n_cols; } - - /** - * get the number of entries in the matrix - */ - IDX_TYPE size() const { return _n_rows*_n_cols; } - - /** - * set the identity matrix - */ - void setIdentity(); + /** + * get the number of rows + * @return the number of rows + */ + IDX_TYPE getNRows () const { return _n_rows; } + /** + * get the number of columns + * @return the number of columns + */ + IDX_TYPE getNCols () const { return _n_cols; } + + /** + * get the number of entries in the matrix + */ + IDX_TYPE size() const { return _n_rows*_n_cols; } + + /** + * set the identity matrix + */ + void setIdentity(); protected: - /** - * the number of rows - */ - IDX_TYPE _n_rows; - /** - * the number of columns - */ - IDX_TYPE _n_cols; + /** + * the number of rows + */ + IDX_TYPE _n_rows; + /** + * the number of columns + */ + IDX_TYPE _n_cols; // zero based addressing, but Fortran storage layout //inline IDX_TYPE address(IDX_TYPE i, IDX_TYPE j) const { return j*rows+i; } @@ -187,8 +187,8 @@ protected: template <typename FP_TYPE, typename IDX_TYPE> std::ostream& operator<< (std::ostream &os, const DenseMatrix<FP_TYPE, IDX_TYPE> &mat) { - mat.write (os); - return os; + mat.write (os); + return os; } } // end namespace MathLib diff --git a/MathLib/LinAlg/Eigen/EigenTools.cpp b/MathLib/LinAlg/Eigen/EigenTools.cpp index 0f4b6a73e54..bdef51d706b 100644 --- a/MathLib/LinAlg/Eigen/EigenTools.cpp +++ b/MathLib/LinAlg/Eigen/EigenTools.cpp @@ -17,8 +17,8 @@ namespace MathLib { void applyKnownSolution(EigenMatrix &A_, EigenVector &b_, EigenVector &/*x*/, - const std::vector<EigenMatrix::IndexType> &vec_knownX_id, - const std::vector<double> &vec_knownX_x, double /*penalty_scaling*/) + const std::vector<EigenMatrix::IndexType> &vec_knownX_id, + const std::vector<double> &vec_knownX_x, double /*penalty_scaling*/) { using SpMat = EigenMatrix::RawMatrixType; static_assert(SpMat::IsRowMajor, "matrix is assumed to be row major!"); diff --git a/MathLib/LinAlg/Eigen/EigenTools.h b/MathLib/LinAlg/Eigen/EigenTools.h index cd5f936a610..9d533e70de4 100644 --- a/MathLib/LinAlg/Eigen/EigenTools.h +++ b/MathLib/LinAlg/Eigen/EigenTools.h @@ -30,19 +30,19 @@ class EigenVector; * entries to enforce some conditions */ void applyKnownSolution(EigenMatrix &A, EigenVector &b, EigenVector &/*x*/, - const std::vector<EigenMatrix::IndexType> &_vec_knownX_id, - const std::vector<double> &_vec_knownX_x, double penalty_scaling = 1e+10); + const std::vector<EigenMatrix::IndexType> &_vec_knownX_id, + const std::vector<double> &_vec_knownX_x, double penalty_scaling = 1e+10); inline void applyKnownSolution(Eigen::MatrixXd &A, Eigen::VectorXd &b, Eigen::VectorXd &/*x*/, - const std::vector<Eigen::MatrixXd::Index> &_vec_knownX_id, - const std::vector<double> &_vec_knownX_x, double penalty_scaling = 1e+10) + const std::vector<Eigen::MatrixXd::Index> &_vec_knownX_id, + const std::vector<double> &_vec_knownX_x, double penalty_scaling = 1e+10) { - (void) A; (void) b; (void) _vec_knownX_id; (void) _vec_knownX_x; - (void) penalty_scaling; + (void) A; (void) b; (void) _vec_knownX_id; (void) _vec_knownX_x; + (void) penalty_scaling; - ERR("Method not implemented."); // TODO implement - std::abort(); + ERR("Method not implemented."); // TODO implement + std::abort(); } } // MathLib diff --git a/MathLib/LinAlg/FinalizeMatrixAssembly.h b/MathLib/LinAlg/FinalizeMatrixAssembly.h index 5a292dcafc4..6b80284b819 100644 --- a/MathLib/LinAlg/FinalizeMatrixAssembly.h +++ b/MathLib/LinAlg/FinalizeMatrixAssembly.h @@ -20,7 +20,7 @@ namespace MathLib template <typename MAT_T> bool finalizeMatrixAssembly(MAT_T &) { - return true; + return true; } } // MathLib diff --git a/MathLib/LinAlg/Lis/LisMatrix.h b/MathLib/LinAlg/Lis/LisMatrix.h index 15e0092102c..ae32fb5cd9d 100644 --- a/MathLib/LinAlg/Lis/LisMatrix.h +++ b/MathLib/LinAlg/Lis/LisMatrix.h @@ -157,8 +157,8 @@ private: LIS_MATRIX _AA; LIS_VECTOR _diag; bool _is_assembled; - IndexType _is; ///< location where the partial matrix _AA starts in global matrix. - IndexType _ie; ///< location where the partial matrix _AA ends in global matrix. + IndexType _is; ///< location where the partial matrix _AA starts in global matrix. + IndexType _ie; ///< location where the partial matrix _AA ends in global matrix. bool _use_external_arrays; // friend function diff --git a/MathLib/LinAlg/Lis/LisOption.h b/MathLib/LinAlg/Lis/LisOption.h index c67421c268a..37e3822f133 100644 --- a/MathLib/LinAlg/Lis/LisOption.h +++ b/MathLib/LinAlg/Lis/LisOption.h @@ -40,20 +40,20 @@ namespace MathLib */ struct LisOption { - LisOption(BaseLib::ConfigTree const* const options) - { - if (options) { - ignoreOtherLinearSolvers(*options, "lis"); - if (auto s = options->getConfParamOptional<std::string>("lis")) { - if (!s->empty()) { - _option_string += " " + *s; - INFO("Lis options: \"%s\"", _option_string.c_str()); - } - } - } - } + LisOption(BaseLib::ConfigTree const* const options) + { + if (options) { + ignoreOtherLinearSolvers(*options, "lis"); + if (auto s = options->getConfParamOptional<std::string>("lis")) { + if (!s->empty()) { + _option_string += " " + *s; + INFO("Lis options: \"%s\"", _option_string.c_str()); + } + } + } + } - std::string _option_string = "-initxzeros 0"; + std::string _option_string = "-initxzeros 0"; }; } diff --git a/MathLib/LinAlg/Lis/LisTools.cpp b/MathLib/LinAlg/Lis/LisTools.cpp index 2439b3163a9..8a3e024ccc4 100644 --- a/MathLib/LinAlg/Lis/LisTools.cpp +++ b/MathLib/LinAlg/Lis/LisTools.cpp @@ -36,77 +36,77 @@ namespace detail /// sorts the columns and values, accordingly. MathLib::CRSMatrix<double, typename LisMatrix::IndexType>* lis2crs(LisMatrix &a) { - using IndexType = LisMatrix::IndexType; - - LIS_MATRIX &A = a.getRawMatrix(); - - IndexType const n_rows(A->n); // number of rows - IndexType *iA(new IndexType[n_rows+1]); // row ptr array - iA[0] = 0; - for (LIS_INT k=1; k<n_rows+1; ++k) { - iA[k] = iA[k-1] + A->w_row[k-1 - A->is]; - } - - IndexType *jA(new IndexType[iA[n_rows]]); // column indices array - double *entries(new double[iA[n_rows]]); - for (IndexType r(0); r<n_rows; ++r) { - IndexType const beg_idx(iA[r]); - IndexType const end_idx(iA[r+1]); - for (IndexType j(beg_idx); j<end_idx; ++j) { - jA[j] = A->w_index[r-A->is][j-beg_idx]; - entries[j] = A->w_value[r-A->is][j-beg_idx]; - } - } - - for (IndexType r(0); r<n_rows; ++r) { - IndexType const beg_idx(iA[r]); - IndexType const end_idx(iA[r+1]); - // sort the column entries of the row - BaseLib::quicksort(jA, beg_idx, end_idx, entries); - } - - return new MathLib::CRSMatrix<double,IndexType>(A->n, iA, jA, entries); + using IndexType = LisMatrix::IndexType; + + LIS_MATRIX &A = a.getRawMatrix(); + + IndexType const n_rows(A->n); // number of rows + IndexType *iA(new IndexType[n_rows+1]); // row ptr array + iA[0] = 0; + for (LIS_INT k=1; k<n_rows+1; ++k) { + iA[k] = iA[k-1] + A->w_row[k-1 - A->is]; + } + + IndexType *jA(new IndexType[iA[n_rows]]); // column indices array + double *entries(new double[iA[n_rows]]); + for (IndexType r(0); r<n_rows; ++r) { + IndexType const beg_idx(iA[r]); + IndexType const end_idx(iA[r+1]); + for (IndexType j(beg_idx); j<end_idx; ++j) { + jA[j] = A->w_index[r-A->is][j-beg_idx]; + entries[j] = A->w_value[r-A->is][j-beg_idx]; + } + } + + for (IndexType r(0); r<n_rows; ++r) { + IndexType const beg_idx(iA[r]); + IndexType const end_idx(iA[r+1]); + // sort the column entries of the row + BaseLib::quicksort(jA, beg_idx, end_idx, entries); + } + + return new MathLib::CRSMatrix<double,IndexType>(A->n, iA, jA, entries); } // This function resets the the column indices and the entries, respectively. // The LIS_MATRIX must have reserved enough memory for each row already! void crs2lis( - MathLib::CRSMatrix<double, typename LisMatrix::IndexType> const& mat, - LIS_MATRIX &A) + MathLib::CRSMatrix<double, typename LisMatrix::IndexType> const& mat, + LIS_MATRIX &A) { - LisMatrix::IndexType const*const jA(mat.getColIdxArray()); - double * entries(const_cast<double*>(mat.getEntryArray())); - - // reset the entries in the lis matrix - LisMatrix::IndexType cnt(0); - for (LIS_INT row_i = 0; row_i < A->n; ++row_i) { - for (LIS_INT j = 0; j < A->w_row[row_i - A->is]; ++j) { - A->w_index[row_i-A->is][j] = jA[cnt]; - A->w_value[row_i-A->is][j] = entries[cnt]; - cnt++; - } - } + LisMatrix::IndexType const*const jA(mat.getColIdxArray()); + double * entries(const_cast<double*>(mat.getEntryArray())); + + // reset the entries in the lis matrix + LisMatrix::IndexType cnt(0); + for (LIS_INT row_i = 0; row_i < A->n; ++row_i) { + for (LIS_INT j = 0; j < A->w_row[row_i - A->is]; ++j) { + A->w_index[row_i-A->is][j] = jA[cnt]; + A->w_value[row_i-A->is][j] = entries[cnt]; + cnt++; + } + } } } // end namespace detail void applyKnownSolution(LisMatrix &eqsA, LisVector &eqsRHS, LisVector &/*eqsX*/, - const std::vector<LisMatrix::IndexType> &input_rows, - const std::vector<double> &input_vals) + const std::vector<LisMatrix::IndexType> &input_rows, + const std::vector<double> &input_vals) { - // unfortunatly the input is not sorted => copy and sort - std::vector<LisMatrix::IndexType> rows(input_rows); - std::vector<double> vals(input_vals); - BaseLib::quicksort(rows, 0, rows.size(), vals); + // unfortunatly the input is not sorted => copy and sort + std::vector<LisMatrix::IndexType> rows(input_rows); + std::vector<double> vals(input_vals); + BaseLib::quicksort(rows, 0, rows.size(), vals); - MathLib::CRSMatrix<double, typename LisMatrix::IndexType> *crs_mat( - MathLib::detail::lis2crs(eqsA)); + MathLib::CRSMatrix<double, typename LisMatrix::IndexType> *crs_mat( + MathLib::detail::lis2crs(eqsA)); - // The following function is defined in CRSTools-impl.h - applyKnownSolution(crs_mat, eqsRHS, input_rows, input_vals); + // The following function is defined in CRSTools-impl.h + applyKnownSolution(crs_mat, eqsRHS, input_rows, input_vals); - detail::crs2lis(*crs_mat, eqsA.getRawMatrix()); + detail::crs2lis(*crs_mat, eqsA.getRawMatrix()); - delete crs_mat; + delete crs_mat; } } // MathLib diff --git a/MathLib/LinAlg/Lis/LisTools.h b/MathLib/LinAlg/Lis/LisTools.h index c5c8ec49bc0..c20b6f85def 100644 --- a/MathLib/LinAlg/Lis/LisTools.h +++ b/MathLib/LinAlg/Lis/LisTools.h @@ -36,8 +36,8 @@ class LisVector; * @param vals a vector of known solutions */ void applyKnownSolution(LisMatrix &eqsA, LisVector &eqsRHS, LisVector &/*eqsX*/, - const std::vector<LisMatrix::IndexType> &rows, - const std::vector<double> &vals); + const std::vector<LisMatrix::IndexType> &rows, + const std::vector<double> &vals); } // MathLib diff --git a/MathLib/LinAlg/Lis/LisVector.cpp b/MathLib/LinAlg/Lis/LisVector.cpp index 7649a2f1d5f..b841046e29f 100644 --- a/MathLib/LinAlg/Lis/LisVector.cpp +++ b/MathLib/LinAlg/Lis/LisVector.cpp @@ -21,64 +21,64 @@ namespace MathLib { LisVector::LisVector(std::size_t length) { - lis_vector_create(0, &_vec); - lis_vector_set_size(_vec, 0, length); + lis_vector_create(0, &_vec); + lis_vector_set_size(_vec, 0, length); } LisVector::LisVector(std::size_t length, double* data) { - lis_vector_create(0, &_vec); - lis_vector_set_size(_vec, 0, length); - for (std::size_t i = 0; i < length; i++) - lis_vector_set_value(LIS_INS_VALUE, i, data[i], _vec); + lis_vector_create(0, &_vec); + lis_vector_set_size(_vec, 0, length); + for (std::size_t i = 0; i < length; i++) + lis_vector_set_value(LIS_INS_VALUE, i, data[i], _vec); } LisVector::LisVector(LisVector const& src) { - lis_vector_duplicate(src._vec, &_vec); - lis_vector_copy(src._vec, _vec); + lis_vector_duplicate(src._vec, &_vec); + lis_vector_copy(src._vec, _vec); } LisVector::~LisVector() { - lis_vector_destroy(_vec); + lis_vector_destroy(_vec); } LisVector& LisVector::operator=(const LisVector& src) { - lis_vector_copy(src._vec, _vec); - return *this; + lis_vector_copy(src._vec, _vec); + return *this; } void LisVector::operator+=(const LisVector& v) { - lis_vector_axpy(1.0, v._vec, _vec); + lis_vector_axpy(1.0, v._vec, _vec); } void LisVector::operator-=(const LisVector& v) { - lis_vector_axpy(-1.0, v._vec, _vec); + lis_vector_axpy(-1.0, v._vec, _vec); } LisVector& LisVector::operator=(double v) { - lis_vector_set_all(v, _vec); - return *this; + lis_vector_set_all(v, _vec); + return *this; } std::size_t LisVector::size() const { - IndexType dummy; - IndexType size; - int const ierr = lis_vector_get_size(_vec, &dummy, &size); - checkLisError(ierr); - assert(size >= 0); // For safe implicit conversion to std::size_t. - return size; + IndexType dummy; + IndexType size; + int const ierr = lis_vector_get_size(_vec, &dummy, &size); + checkLisError(ierr); + assert(size >= 0); // For safe implicit conversion to std::size_t. + return size; } void LisVector::write(const std::string& filename) const { - lis_output_vector(_vec, LIS_FMT_PLAIN, const_cast<char*>(filename.c_str())); + lis_output_vector(_vec, LIS_FMT_PLAIN, const_cast<char*>(filename.c_str())); } } // MathLib diff --git a/MathLib/LinAlg/Lis/LisVector.h b/MathLib/LinAlg/Lis/LisVector.h index f98333cac3f..202674ee0bb 100644 --- a/MathLib/LinAlg/Lis/LisVector.h +++ b/MathLib/LinAlg/Lis/LisVector.h @@ -28,97 +28,97 @@ namespace MathLib class LisVector { public: - using IndexType = LIS_INT; + using IndexType = LIS_INT; public: - /** - * Constructor for initialization of the number of rows - * @param length number of rows - */ - explicit LisVector(std::size_t length); - - /** - * Constructor using the given raw data - * @param length the length of the vector - * @param data the raw data - */ - LisVector(std::size_t length, double* data); - - /// copy constructor - LisVector(LisVector const& src); - - /** - * - */ - virtual ~LisVector(); - - /// return a vector length - std::size_t size() const; - - /// return a start index of the active data range - std::size_t getRangeBegin() const { return 0; } - /// return an end index of the active data range - std::size_t getRangeEnd() const { return this->size(); } - /// set all values in this vector - LisVector& operator=(double v); - - // TODO preliminary - void setZero() { *this = 0.0; } - - /// access entry - double operator[](IndexType rowId) const { return get(rowId); } - /// get entry - double get(IndexType rowId) const - { - double v = .0; - lis_vector_get_value(_vec, rowId, &v); - return v; - } - - /// set entry - void set(IndexType rowId, double v) - { - lis_vector_set_value(LIS_INS_VALUE, rowId, v, _vec); - } - - /// add entry - void add(IndexType rowId, double v) - { - lis_vector_set_value(LIS_ADD_VALUE, rowId, v, _vec); - } - - /// printout this equation for debugging - void write(const std::string& filename) const; - - /// return a raw Lis vector object - LIS_VECTOR& getRawVector() { return _vec; } - /// vector operation: set data - LisVector& operator=(const LisVector& src); - - /// vector operation: add - void operator+=(const LisVector& v); - - /// vector operation: subtract - void operator-=(const LisVector& v); - - /// - template <class T_SUBVEC> - void add(const std::vector<IndexType>& pos, const T_SUBVEC& sub_vec) - { - for (std::size_t i = 0; i < pos.size(); ++i) - { - this->add(pos[i], sub_vec[i]); - } - } - - /// Copy vector values. - void copyValues(std::vector<double>& u) const - { - assert(u.size() == size()); - lis_vector_get_values(_vec, 0, size(), u.data()); - } + /** + * Constructor for initialization of the number of rows + * @param length number of rows + */ + explicit LisVector(std::size_t length); + + /** + * Constructor using the given raw data + * @param length the length of the vector + * @param data the raw data + */ + LisVector(std::size_t length, double* data); + + /// copy constructor + LisVector(LisVector const& src); + + /** + * + */ + virtual ~LisVector(); + + /// return a vector length + std::size_t size() const; + + /// return a start index of the active data range + std::size_t getRangeBegin() const { return 0; } + /// return an end index of the active data range + std::size_t getRangeEnd() const { return this->size(); } + /// set all values in this vector + LisVector& operator=(double v); + + // TODO preliminary + void setZero() { *this = 0.0; } + + /// access entry + double operator[](IndexType rowId) const { return get(rowId); } + /// get entry + double get(IndexType rowId) const + { + double v = .0; + lis_vector_get_value(_vec, rowId, &v); + return v; + } + + /// set entry + void set(IndexType rowId, double v) + { + lis_vector_set_value(LIS_INS_VALUE, rowId, v, _vec); + } + + /// add entry + void add(IndexType rowId, double v) + { + lis_vector_set_value(LIS_ADD_VALUE, rowId, v, _vec); + } + + /// printout this equation for debugging + void write(const std::string& filename) const; + + /// return a raw Lis vector object + LIS_VECTOR& getRawVector() { return _vec; } + /// vector operation: set data + LisVector& operator=(const LisVector& src); + + /// vector operation: add + void operator+=(const LisVector& v); + + /// vector operation: subtract + void operator-=(const LisVector& v); + + /// + template <class T_SUBVEC> + void add(const std::vector<IndexType>& pos, const T_SUBVEC& sub_vec) + { + for (std::size_t i = 0; i < pos.size(); ++i) + { + this->add(pos[i], sub_vec[i]); + } + } + + /// Copy vector values. + void copyValues(std::vector<double>& u) const + { + assert(u.size() == size()); + lis_vector_get_values(_vec, 0, size(), u.data()); + } private: - LIS_VECTOR _vec; + LIS_VECTOR _vec; }; } // MathLib diff --git a/MathLib/LinAlg/Preconditioner/generateDiagPrecond.cpp b/MathLib/LinAlg/Preconditioner/generateDiagPrecond.cpp index bbbc5609444..eda88c34bb4 100644 --- a/MathLib/LinAlg/Preconditioner/generateDiagPrecond.cpp +++ b/MathLib/LinAlg/Preconditioner/generateDiagPrecond.cpp @@ -19,70 +19,70 @@ namespace MathLib { bool generateDiagPrecond (unsigned n, unsigned const*const iA, unsigned const*const jA, - double const*const A, double* diag) + double const*const A, double* diag) { - unsigned idx; // first idx of next row - unsigned c; // column - unsigned j; - bool has_no_diag; + unsigned idx; // first idx of next row + unsigned c; // column + unsigned j; + bool has_no_diag; - for (unsigned r(0); r<n; ++r) { - idx=iA[r+1]; - has_no_diag=true; - for (j=iA[r]; j<idx && has_no_diag; ++j) { - c=jA[j]; - if (c==r) { - has_no_diag=false; - diag[r] = 1.0/A[j]; - } - } - if (j==idx && has_no_diag) { - std::cout << "row " << r << " has no diagonal element " << std::endl; - return false; - } - } - return true; + for (unsigned r(0); r<n; ++r) { + idx=iA[r+1]; + has_no_diag=true; + for (j=iA[r]; j<idx && has_no_diag; ++j) { + c=jA[j]; + if (c==r) { + has_no_diag=false; + diag[r] = 1.0/A[j]; + } + } + if (j==idx && has_no_diag) { + std::cout << "row " << r << " has no diagonal element " << std::endl; + return false; + } + } + return true; } bool generateDiagPrecondRowSum(unsigned n, unsigned const*const iA, double const*const A, double* diag) { - unsigned idx; // first idx of next row - unsigned j; + unsigned idx; // first idx of next row + unsigned j; - for (unsigned r(0); r<n; ++r) { - diag[r] = 0.0; - idx=iA[r+1]; - for (j=iA[r]; j<idx; ++j) { - diag[r] += fabs(A[j]); - } - if (fabs(diag[r]) < std::numeric_limits<double>::epsilon()) { - std::cout << "row " << r << " has only very small entries" << std::endl; - return false; - } - diag[r] = 1.0/diag[r]; - } - return true; + for (unsigned r(0); r<n; ++r) { + diag[r] = 0.0; + idx=iA[r+1]; + for (j=iA[r]; j<idx; ++j) { + diag[r] += fabs(A[j]); + } + if (fabs(diag[r]) < std::numeric_limits<double>::epsilon()) { + std::cout << "row " << r << " has only very small entries" << std::endl; + return false; + } + diag[r] = 1.0/diag[r]; + } + return true; } bool generateDiagPrecondRowMax(unsigned n, unsigned const*const iA, double const*const A, double* diag) { - unsigned idx; // first idx of next row - unsigned j; + unsigned idx; // first idx of next row + unsigned j; - for (unsigned r(0); r<n; ++r) { - idx=iA[r+1]; - diag[r] = A[idx]; - for (j=iA[r]; j<idx; ++j) { - if (A[j] > diag[r]) - diag[r] = A[j]; - } - if (fabs(diag[r]) < std::numeric_limits<double>::epsilon()) { - std::cout << "the maximum entry of row " << r << " has only very small value" << std::endl; - return false; - } - diag[r] = 1.0/diag[r]; - } - return true; + for (unsigned r(0); r<n; ++r) { + idx=iA[r+1]; + diag[r] = A[idx]; + for (j=iA[r]; j<idx; ++j) { + if (A[j] > diag[r]) + diag[r] = A[j]; + } + if (fabs(diag[r]) < std::numeric_limits<double>::epsilon()) { + std::cout << "the maximum entry of row " << r << " has only very small value" << std::endl; + return false; + } + diag[r] = 1.0/diag[r]; + } + return true; } } // end namespace MathLib diff --git a/MathLib/LinAlg/Preconditioner/generateDiagPrecond.h b/MathLib/LinAlg/Preconditioner/generateDiagPrecond.h index 2dd4412f54e..99747a8a2c8 100644 --- a/MathLib/LinAlg/Preconditioner/generateDiagPrecond.h +++ b/MathLib/LinAlg/Preconditioner/generateDiagPrecond.h @@ -27,7 +27,7 @@ namespace MathLib { * @return true, if all diagonal entries are distinct from zero, else false */ bool generateDiagPrecond(unsigned n, unsigned const*const iA, unsigned const*const jA, - double const*const A, double* diag); + double const*const A, double* diag); /** * diagonal preconditioner \f$P_{ii} = \left(\sum_{j} |a_{ij}|\right)^{-1}\f$ associated with \f$n \times n\f$ matrix \f$A\f$ @@ -38,7 +38,7 @@ bool generateDiagPrecond(unsigned n, unsigned const*const iA, unsigned const*con * @return true, if all row sums are distinct from zero, else false */ bool generateDiagPrecondRowSum(unsigned n, unsigned const*const iA, double const*const A, - double* diag); + double* diag); /** * diagonal preconditioner \f$P_{ii} = \left(\max_{j} a_{ij}\right)^{-1}\f$ associated with \f$n \times n\f$ matrix \f$A\f$ @@ -49,7 +49,7 @@ bool generateDiagPrecondRowSum(unsigned n, unsigned const*const iA, double const * @return true, if all row sums are distinct from zero, else false */ bool generateDiagPrecondRowMax(unsigned n, unsigned const*const iA, double const*const A, - double* diag); + double* diag); } // end namespace MathLib diff --git a/MathLib/LinAlg/RowColumnIndices.h b/MathLib/LinAlg/RowColumnIndices.h index 8baeaad80e2..756b1f83356 100644 --- a/MathLib/LinAlg/RowColumnIndices.h +++ b/MathLib/LinAlg/RowColumnIndices.h @@ -18,13 +18,13 @@ namespace MathLib template <typename IDX_TYPE> struct RowColumnIndices { - typedef typename std::vector<IDX_TYPE> LineIndex; - RowColumnIndices(LineIndex const& rows_, LineIndex const& columns_) - : rows(rows_), columns(columns_) - { } + typedef typename std::vector<IDX_TYPE> LineIndex; + RowColumnIndices(LineIndex const& rows_, LineIndex const& columns_) + : rows(rows_), columns(columns_) + { } - LineIndex const& rows; - LineIndex const& columns; + LineIndex const& rows; + LineIndex const& columns; }; } // MathLib diff --git a/MathLib/LinAlg/Solvers/BiCGStab.cpp b/MathLib/LinAlg/Solvers/BiCGStab.cpp index a29b12e3fbd..a7013bc1d98 100644 --- a/MathLib/LinAlg/Solvers/BiCGStab.cpp +++ b/MathLib/LinAlg/Solvers/BiCGStab.cpp @@ -22,130 +22,130 @@ namespace MathLib { unsigned BiCGStab(CRSMatrix<double, unsigned> const& A, double* const b, double* const x, - double& eps, unsigned& nsteps) + double& eps, unsigned& nsteps) { - const unsigned N(A.getNRows()); - double *v (new double[8* N]); - double *p (v + N); - double *phat (p + N); - double *s (phat + N); - double *shat (s + N); - double *t (shat + N); - double *r (t + N); - double *r0 (r + N); - double resid; - - // normb = |b| - double nrmb = blas::nrm2(N, b); - if (nrmb < D_PREC) nrmb = D_ONE; - - // r = r0 = b - A x0 - blas::copy(N, b, r0); - A.amux(D_MONE, x, r0); - blas::copy(N, r0, r); - - resid = blas::nrm2(N, r) / nrmb; - - if (resid < eps) { - eps = resid; - nsteps = 0; - delete[] v; - return 0; - } - - double alpha = D_ZERO, omega = D_ZERO, rho2 = D_ZERO; - - for (unsigned l = 1; l <= nsteps; ++l) { - // rho1 = r0 * r - const double rho1 = blas::scpr(N, r0, r); - if (fabs(rho1) < D_PREC) { - eps = blas::nrm2(N, r) / nrmb; - delete[] v; - return 2; - } - - if (l == 1) - blas::copy(N, r, p); // p = r - else { -// blas::axpy(N, -omega, v, p); // p = (p-omega v)*beta+r - const double beta = rho1 * alpha / (rho2 * omega); -// blas::scal(N, beta, p); -// blas::axpy(N, D_ONE, r, p); - // p = (p-omega v)*beta+r - for (unsigned k(0); k<N; k++) { - p[k] = (p[k] - omega * v[k]) * beta + r[k]; - } - } - - // p^ = C p - blas::copy(N, p, phat); - A.precondApply(phat); - // v = A p^ - blas::setzero(N, v); - A.amux(D_ONE, phat, v); - - alpha = rho1 / blas::scpr(N, r0, v); - - // s = r - alpha v -// blas::copy(N, r, s); -// blas::axpy(N, -alpha, v, s); - for (unsigned k(0); k<N; k++) { - s[k] = r[k] - alpha * v[k]; - } - - resid = blas::nrm2(N, s) / nrmb; + const unsigned N(A.getNRows()); + double *v (new double[8* N]); + double *p (v + N); + double *phat (p + N); + double *s (phat + N); + double *shat (s + N); + double *t (shat + N); + double *r (t + N); + double *r0 (r + N); + double resid; + + // normb = |b| + double nrmb = blas::nrm2(N, b); + if (nrmb < D_PREC) nrmb = D_ONE; + + // r = r0 = b - A x0 + blas::copy(N, b, r0); + A.amux(D_MONE, x, r0); + blas::copy(N, r0, r); + + resid = blas::nrm2(N, r) / nrmb; + + if (resid < eps) { + eps = resid; + nsteps = 0; + delete[] v; + return 0; + } + + double alpha = D_ZERO, omega = D_ZERO, rho2 = D_ZERO; + + for (unsigned l = 1; l <= nsteps; ++l) { + // rho1 = r0 * r + const double rho1 = blas::scpr(N, r0, r); + if (fabs(rho1) < D_PREC) { + eps = blas::nrm2(N, r) / nrmb; + delete[] v; + return 2; + } + + if (l == 1) + blas::copy(N, r, p); // p = r + else { +// blas::axpy(N, -omega, v, p); // p = (p-omega v)*beta+r + const double beta = rho1 * alpha / (rho2 * omega); +// blas::scal(N, beta, p); +// blas::axpy(N, D_ONE, r, p); + // p = (p-omega v)*beta+r + for (unsigned k(0); k<N; k++) { + p[k] = (p[k] - omega * v[k]) * beta + r[k]; + } + } + + // p^ = C p + blas::copy(N, p, phat); + A.precondApply(phat); + // v = A p^ + blas::setzero(N, v); + A.amux(D_ONE, phat, v); + + alpha = rho1 / blas::scpr(N, r0, v); + + // s = r - alpha v +// blas::copy(N, r, s); +// blas::axpy(N, -alpha, v, s); + for (unsigned k(0); k<N; k++) { + s[k] = r[k] - alpha * v[k]; + } + + resid = blas::nrm2(N, s) / nrmb; #ifndef NDEBUG - std::cout << "Step " << l << ", resid=" << resid << std::endl; + std::cout << "Step " << l << ", resid=" << resid << std::endl; #endif - if (resid < eps) { - // x += alpha p^ - blas::axpy(N, alpha, phat, x); - eps = resid; - nsteps = l; - delete[] v; - return 0; - } - - // s^ = C s - blas::copy(N, s, shat); - A.precondApply(shat); - - // t = A s^ - blas::setzero(N, t); - A.amux(D_ONE, shat, t); - - // omega = t*s / t*t - omega = blas::scpr(N, t, s) / blas::scpr(N, t, t); - - // x += alpha p^ + omega s^ - blas::axpy(N, alpha, phat, x); - blas::axpy(N, omega, shat, x); - - // r = s - omega t - blas::copy(N, s, r); - blas::axpy(N, -omega, t, r); - - rho2 = rho1; - - resid = blas::nrm2(N, r) / nrmb; - - if (resid < eps) { - eps = resid; - nsteps = l; - delete[] v; - return 0; - } - - if (fabs(omega) < D_PREC) { - eps = resid; - delete[] v; - return 3; - } - } - - eps = resid; - delete[] v; - return 1; + if (resid < eps) { + // x += alpha p^ + blas::axpy(N, alpha, phat, x); + eps = resid; + nsteps = l; + delete[] v; + return 0; + } + + // s^ = C s + blas::copy(N, s, shat); + A.precondApply(shat); + + // t = A s^ + blas::setzero(N, t); + A.amux(D_ONE, shat, t); + + // omega = t*s / t*t + omega = blas::scpr(N, t, s) / blas::scpr(N, t, t); + + // x += alpha p^ + omega s^ + blas::axpy(N, alpha, phat, x); + blas::axpy(N, omega, shat, x); + + // r = s - omega t + blas::copy(N, s, r); + blas::axpy(N, -omega, t, r); + + rho2 = rho1; + + resid = blas::nrm2(N, r) / nrmb; + + if (resid < eps) { + eps = resid; + nsteps = l; + delete[] v; + return 0; + } + + if (fabs(omega) < D_PREC) { + eps = resid; + delete[] v; + return 3; + } + } + + eps = resid; + delete[] v; + return 1; } } // end namespace MathLib diff --git a/MathLib/LinAlg/Solvers/CG.cpp b/MathLib/LinAlg/Solvers/CG.cpp index 41249ed5d08..e93f6db443c 100644 --- a/MathLib/LinAlg/Solvers/CG.cpp +++ b/MathLib/LinAlg/Solvers/CG.cpp @@ -36,86 +36,86 @@ namespace MathLib { extern unsigned CG(CRSMatrix<double,unsigned> const * mat, double const * const b, - double* const x, double& eps, unsigned& nsteps) + double* const x, double& eps, unsigned& nsteps) { - unsigned N = mat->getNRows(); - double *p, *q, *r, *rhat, rho, rho1 = 0.0; - - p = new double[4* N]; - q = p + N; - r = q + N; - rhat = r + N; - - double nrmb = sqrt(scalarProduct(b, b, N)); - if (nrmb < std::numeric_limits<double>::epsilon()) { - blas::setzero(N, x); - eps = 0.0; - nsteps = 0; - delete[] p; - return 0; - } - - // r0 = b - Ax0 - mat->amux(D_MONE, x, r); - for (unsigned k(0); k < N; k++) { - r[k] = b[k] - r[k]; - } - - double resid = blas::nrm2(N, r); - if (resid <= eps * nrmb) { - eps = resid / nrmb; - nsteps = 0; - delete[] p; - return 0; - } - - for (unsigned l = 1; l <= nsteps; ++l) { + unsigned N = mat->getNRows(); + double *p, *q, *r, *rhat, rho, rho1 = 0.0; + + p = new double[4* N]; + q = p + N; + r = q + N; + rhat = r + N; + + double nrmb = sqrt(scalarProduct(b, b, N)); + if (nrmb < std::numeric_limits<double>::epsilon()) { + blas::setzero(N, x); + eps = 0.0; + nsteps = 0; + delete[] p; + return 0; + } + + // r0 = b - Ax0 + mat->amux(D_MONE, x, r); + for (unsigned k(0); k < N; k++) { + r[k] = b[k] - r[k]; + } + + double resid = blas::nrm2(N, r); + if (resid <= eps * nrmb) { + eps = resid / nrmb; + nsteps = 0; + delete[] p; + return 0; + } + + for (unsigned l = 1; l <= nsteps; ++l) { #ifndef NDEBUG - std::cout << "Step " << l << ", resid=" << resid / nrmb << std::endl; + std::cout << "Step " << l << ", resid=" << resid / nrmb << std::endl; #endif - // r^ = C r - blas::copy(N, r, rhat); - mat->precondApply(rhat); - - // rho = r * r^; - rho = scalarProduct(r, rhat, N); // num_threads); - - if (l > 1) { - double beta = rho / rho1; - // p = r^ + beta * p - unsigned k; - for (k = 0; k < N; k++) { - p[k] = rhat[k] + beta * p[k]; - } - } else blas::copy(N, rhat, p); - - // q = Ap - blas::setzero(N, q); - mat->amux(D_ONE, p, q); - - // alpha = rho / p*q - double alpha = rho / scalarProduct(p, q, N); - - // x += alpha * p - blas::axpy(N, alpha, p, x); - - // r -= alpha * q - blas::axpy(N, -alpha, q, r); - - resid = sqrt(scalarProduct(r, r, N)); - - if (resid <= eps * nrmb) { - eps = resid / nrmb; - nsteps = l; - delete[] p; - return 0; - } - - rho1 = rho; - } - eps = resid / nrmb; - delete[] p; - return 1; + // r^ = C r + blas::copy(N, r, rhat); + mat->precondApply(rhat); + + // rho = r * r^; + rho = scalarProduct(r, rhat, N); // num_threads); + + if (l > 1) { + double beta = rho / rho1; + // p = r^ + beta * p + unsigned k; + for (k = 0; k < N; k++) { + p[k] = rhat[k] + beta * p[k]; + } + } else blas::copy(N, rhat, p); + + // q = Ap + blas::setzero(N, q); + mat->amux(D_ONE, p, q); + + // alpha = rho / p*q + double alpha = rho / scalarProduct(p, q, N); + + // x += alpha * p + blas::axpy(N, alpha, p, x); + + // r -= alpha * q + blas::axpy(N, -alpha, q, r); + + resid = sqrt(scalarProduct(r, r, N)); + + if (resid <= eps * nrmb) { + eps = resid / nrmb; + nsteps = l; + delete[] p; + return 0; + } + + rho1 = rho; + } + eps = resid / nrmb; + delete[] p; + return 1; } } // end namespace MathLib diff --git a/MathLib/LinAlg/Solvers/CG.h b/MathLib/LinAlg/Solvers/CG.h index 37c37aa0cb8..aed3d6eebc3 100644 --- a/MathLib/LinAlg/Solvers/CG.h +++ b/MathLib/LinAlg/Solvers/CG.h @@ -21,11 +21,11 @@ namespace MathLib { template <typename PF_TYPE, typename IDX_TYPE> class CRSMatrix; unsigned CG(CRSMatrix<double,unsigned> const * mat, double const * const b, - double* const x, double& eps, unsigned& nsteps); + double* const x, double& eps, unsigned& nsteps); #ifdef _OPENMP unsigned CGParallel(CRSMatrix<double,unsigned> const * mat, double const * const b, - double* const x, double& eps, unsigned& nsteps); + double* const x, double& eps, unsigned& nsteps); #endif } // end namespace MathLib diff --git a/MathLib/LinAlg/Solvers/CGParallel.cpp b/MathLib/LinAlg/Solvers/CGParallel.cpp index 786b61558b4..1ba30c35f3e 100644 --- a/MathLib/LinAlg/Solvers/CGParallel.cpp +++ b/MathLib/LinAlg/Solvers/CGParallel.cpp @@ -40,122 +40,122 @@ namespace MathLib { #ifdef _OPENMP unsigned CGParallel(CRSMatrix<double,unsigned> const * mat, double const * const b, - double* const x, double& eps, unsigned& nsteps) + double* const x, double& eps, unsigned& nsteps) { #ifdef WIN32 #pragma warning ( push ) #pragma warning ( disable: 4018 ) #endif - const unsigned N(mat->getNRows()); - double * __restrict__ p(new double[N]); - double * __restrict__ q(new double[N]); - double * __restrict__ r(new double[N]); - double * __restrict__ rhat(new double[N]); - double rho, rho1 = 0.0; - - double nrmb = sqrt(scalarProduct(b, b, N)); - - if (nrmb < std::numeric_limits<double>::epsilon()) { - blas::setzero(N, x); - eps = 0.0; - nsteps = 0; - delete[] p; - return 0; - } - - // r0 = b - Ax0 - mat->amux(D_MONE, x, r); - for (unsigned k(0); k < N; k++) { - r[k] = b[k] - r[k]; - } - - double resid = blas::nrm2(N, r); - if (resid <= eps * nrmb) { - eps = resid / nrmb; - nsteps = 0; - delete[] p; - delete[] q; - delete[] r; - delete[] rhat; - return 0; - } - - OPENMP_LOOP_TYPE k; - for (unsigned l = 1; l <= nsteps; ++l) { + const unsigned N(mat->getNRows()); + double * __restrict__ p(new double[N]); + double * __restrict__ q(new double[N]); + double * __restrict__ r(new double[N]); + double * __restrict__ rhat(new double[N]); + double rho, rho1 = 0.0; + + double nrmb = sqrt(scalarProduct(b, b, N)); + + if (nrmb < std::numeric_limits<double>::epsilon()) { + blas::setzero(N, x); + eps = 0.0; + nsteps = 0; + delete[] p; + return 0; + } + + // r0 = b - Ax0 + mat->amux(D_MONE, x, r); + for (unsigned k(0); k < N; k++) { + r[k] = b[k] - r[k]; + } + + double resid = blas::nrm2(N, r); + if (resid <= eps * nrmb) { + eps = resid / nrmb; + nsteps = 0; + delete[] p; + delete[] q; + delete[] r; + delete[] rhat; + return 0; + } + + OPENMP_LOOP_TYPE k; + for (unsigned l = 1; l <= nsteps; ++l) { #ifndef NDEBUG - std::cout << "Step " << l << ", resid=" << resid / nrmb << std::endl; + std::cout << "Step " << l << ", resid=" << resid / nrmb << std::endl; #endif - // r^ = C r - // rhat = r -// blas::copy(N, r, rhat); + // r^ = C r + // rhat = r +// blas::copy(N, r, rhat); #pragma omp parallel for - for (k = 0; k < N; k++) { - rhat[k] = r[k]; - } - mat->precondApply(rhat); + for (k = 0; k < N; k++) { + rhat[k] = r[k]; + } + mat->precondApply(rhat); - // rho = r * r^; - rho = scalarProduct(r, rhat, N); + // rho = r * r^; + rho = scalarProduct(r, rhat, N); - if (l > 1) { - double beta = rho / rho1; - // p = r^ + beta * p + if (l > 1) { + double beta = rho / rho1; + // p = r^ + beta * p #pragma omp parallel for - for (k = 0; k < N; k++) { - p[k] = rhat[k] + beta * p[k]; - } - } else { -// blas::copy(N, rhat, p); - #pragma omp parallel for - for (k = 0; k < N; k++) { - p[k] = rhat[k]; - } - } - - // q = Ap - mat->amux(D_ONE, p, q); - - // alpha = rho / p*q - double alpha = rho / scalarProduct(p, q, N); - - #pragma omp parallel - { - // x += alpha * p - #pragma omp for nowait - for (k = 0; k < N; k++) { - x[k] += alpha * p[k]; - } - - // r -= alpha * q - #pragma omp for nowait - for (k = 0; k < N; k++) { - r[k] -= alpha * q[k]; - } - - #pragma omp barrier - } // end #pragma omp parallel - - resid = sqrt(scalarProduct(r, r, N)); - - if (resid <= eps * nrmb) { - eps = resid / nrmb; - nsteps = l; - delete[] p; - delete[] q; - delete[] r; - delete[] rhat; - return 0; - } - - rho1 = rho; - } - eps = resid / nrmb; - delete[] p; - delete[] q; - delete[] r; - delete[] rhat; - return 1; + for (k = 0; k < N; k++) { + p[k] = rhat[k] + beta * p[k]; + } + } else { +// blas::copy(N, rhat, p); + #pragma omp parallel for + for (k = 0; k < N; k++) { + p[k] = rhat[k]; + } + } + + // q = Ap + mat->amux(D_ONE, p, q); + + // alpha = rho / p*q + double alpha = rho / scalarProduct(p, q, N); + + #pragma omp parallel + { + // x += alpha * p + #pragma omp for nowait + for (k = 0; k < N; k++) { + x[k] += alpha * p[k]; + } + + // r -= alpha * q + #pragma omp for nowait + for (k = 0; k < N; k++) { + r[k] -= alpha * q[k]; + } + + #pragma omp barrier + } // end #pragma omp parallel + + resid = sqrt(scalarProduct(r, r, N)); + + if (resid <= eps * nrmb) { + eps = resid / nrmb; + nsteps = l; + delete[] p; + delete[] q; + delete[] r; + delete[] rhat; + return 0; + } + + rho1 = rho; + } + eps = resid / nrmb; + delete[] p; + delete[] q; + delete[] r; + delete[] rhat; + return 1; #ifdef WIN32 #pragma warning ( pop ) #endif diff --git a/MathLib/LinAlg/Solvers/GMRes.cpp b/MathLib/LinAlg/Solvers/GMRes.cpp index c4e59484ae1..21208584704 100644 --- a/MathLib/LinAlg/Solvers/GMRes.cpp +++ b/MathLib/LinAlg/Solvers/GMRes.cpp @@ -22,153 +22,153 @@ namespace MathLib { static void genPlRot(double dx, double dy, double& cs, double& sn) { - if (dy <= std::numeric_limits<double>::epsilon()) { - cs = 1.0; - sn = 0.0; - } else if (fabs(dy) > fabs(dx)) { - const double tmp = dx / dy; - sn = 1.0 / sqrt(1.0 + tmp * tmp); - cs = tmp * sn; - } else { - const double tmp = dy / dx; - cs = 1.0 / sqrt(1.0 + tmp * tmp); - sn = tmp * cs; - } + if (dy <= std::numeric_limits<double>::epsilon()) { + cs = 1.0; + sn = 0.0; + } else if (fabs(dy) > fabs(dx)) { + const double tmp = dx / dy; + sn = 1.0 / sqrt(1.0 + tmp * tmp); + cs = tmp * sn; + } else { + const double tmp = dy / dx; + cs = 1.0 / sqrt(1.0 + tmp * tmp); + sn = tmp * cs; + } } inline void applPlRot(double& dx, double& dy, double cs, double sn) { - const double tmp = cs * dx + sn * dy; - dy = cs * dy - sn * dx; - dx = tmp; + const double tmp = cs * dx + sn * dy; + dy = cs * dy - sn * dx; + dx = tmp; } // solve H y = s and update x += MVy static void update(const CRSMatrix<double,unsigned>& A, unsigned k, double* H, - unsigned ldH, double* s, double* V, double* x) + unsigned ldH, double* s, double* V, double* x) { - const unsigned n(static_cast<unsigned>(A.getNRows())); - double *y = new double[k]; - double *xh = new double[n]; - blas::copy(k, s, y); - int inf; - - dtrtrs_(JOB_STR + 5, JOB_STR, JOB_STR, &k, &N_ONE, H, &ldH, y, &k, &inf); - assert(inf == 0); - - // x += M V y - blas::setzero(n, xh); - blas::gemva(n, k, D_ONE, V, y, xh); - A.precondApply(xh); - blas::add(n, xh, x); - - delete[] xh; - delete[] y; + const unsigned n(static_cast<unsigned>(A.getNRows())); + double *y = new double[k]; + double *xh = new double[n]; + blas::copy(k, s, y); + int inf; + + dtrtrs_(JOB_STR + 5, JOB_STR, JOB_STR, &k, &N_ONE, H, &ldH, y, &k, &inf); + assert(inf == 0); + + // x += M V y + blas::setzero(n, xh); + blas::gemva(n, k, D_ONE, V, y, xh); + A.precondApply(xh); + blas::add(n, xh, x); + + delete[] xh; + delete[] y; } unsigned GMRes(const CRSMatrix<double,unsigned>& A, double* const b, double* const x, - double& eps, unsigned m, unsigned& nsteps) + double& eps, unsigned m, unsigned& nsteps) { - double resid; - unsigned j = 1; - - const unsigned n (static_cast<unsigned>(A.getNRows())); - - double *r = new double[2*n + (n + m + 4) * (m + 1)]; // n - double *V = r + n; // n x (m+1) - double *H = V + n * (m + 1); // m+1 x m - double *cs = H + (m + 1) * m; // m+1 - double *sn = cs + m + 1; // m+1 - double *s = sn + m + 1; // m+1 - double *xh = s + m + 1; // m+1 - - // normb = norm(b) - double normb = blas::nrm2(n, b); - if (normb == 0.0) { - blas::setzero(n, x); - eps = 0.0; - nsteps = 0; - delete[] r; - return 0; - } - - // r = b - Ax - blas::copy(n, b, r); - A.amux(D_MONE, x, r); - - double beta = blas::nrm2(n, r); - - if ((resid = beta / normb) <= eps) { - eps = resid; - nsteps = 0; - delete[] r; - return 0; - } - - while (j <= nsteps) { - blas::copy(n, r, V); // v0 first orthonormal vector - blas::scal(n, 1.0 / beta, V); - - s[0] = beta; - blas::setzero(m, s + 1); - - for (unsigned i = 0; i < m && j <= nsteps; i++, j++) { - - // w = A M * v[i]; - blas::copy(n, V + i * n, xh); - A.precondApply(xh); - blas::setzero(n, V + (i + 1) * n); - A.amux(D_ONE, xh, V + (i + 1) * n); - - for (unsigned k = 0; k <= i; k++) { - H[k + i * (m + 1)] = blas::scpr(n, V + (i + 1) * n, V + k * n); - blas::axpy(n, -H[k + i * (m + 1)], V + k * n, V + (i + 1) * n); - } - - H[i * (m + 2) + 1] = blas::nrm2(n, V + (i + 1) * n); - blas::scal(n, 1.0 / H[i * (m + 2) + 1], V + (i + 1) * n); - - // apply old Givens rotations to the last column in H - for (unsigned k = 0; k < i; k++) - applPlRot(H[k + i * (m + 1)], H[k + 1 + i * (m + 1)], cs[k], - sn[k]); - - // generate new Givens rotation which eleminates H[i*(m+2)+1] - genPlRot(H[i * (m + 2)], H[i * (m + 2) + 1], cs[i], sn[i]); - // apply it to H and s - applPlRot(H[i * (m + 2)], H[i * (m + 2) + 1], cs[i], sn[i]); - applPlRot(s[i], s[i + 1], cs[i], sn[i]); - - if ((resid = fabs(s[i + 1] / normb)) < eps) { - update(A, i + 1, H, m + 1, s, V, x); - eps = resid; - nsteps = j; - delete[] r; - return 0; - } + double resid; + unsigned j = 1; + + const unsigned n (static_cast<unsigned>(A.getNRows())); + + double *r = new double[2*n + (n + m + 4) * (m + 1)]; // n + double *V = r + n; // n x (m+1) + double *H = V + n * (m + 1); // m+1 x m + double *cs = H + (m + 1) * m; // m+1 + double *sn = cs + m + 1; // m+1 + double *s = sn + m + 1; // m+1 + double *xh = s + m + 1; // m+1 + + // normb = norm(b) + double normb = blas::nrm2(n, b); + if (normb == 0.0) { + blas::setzero(n, x); + eps = 0.0; + nsteps = 0; + delete[] r; + return 0; + } + + // r = b - Ax + blas::copy(n, b, r); + A.amux(D_MONE, x, r); + + double beta = blas::nrm2(n, r); + + if ((resid = beta / normb) <= eps) { + eps = resid; + nsteps = 0; + delete[] r; + return 0; + } + + while (j <= nsteps) { + blas::copy(n, r, V); // v0 first orthonormal vector + blas::scal(n, 1.0 / beta, V); + + s[0] = beta; + blas::setzero(m, s + 1); + + for (unsigned i = 0; i < m && j <= nsteps; i++, j++) { + + // w = A M * v[i]; + blas::copy(n, V + i * n, xh); + A.precondApply(xh); + blas::setzero(n, V + (i + 1) * n); + A.amux(D_ONE, xh, V + (i + 1) * n); + + for (unsigned k = 0; k <= i; k++) { + H[k + i * (m + 1)] = blas::scpr(n, V + (i + 1) * n, V + k * n); + blas::axpy(n, -H[k + i * (m + 1)], V + k * n, V + (i + 1) * n); + } + + H[i * (m + 2) + 1] = blas::nrm2(n, V + (i + 1) * n); + blas::scal(n, 1.0 / H[i * (m + 2) + 1], V + (i + 1) * n); + + // apply old Givens rotations to the last column in H + for (unsigned k = 0; k < i; k++) + applPlRot(H[k + i * (m + 1)], H[k + 1 + i * (m + 1)], cs[k], + sn[k]); + + // generate new Givens rotation which eleminates H[i*(m+2)+1] + genPlRot(H[i * (m + 2)], H[i * (m + 2) + 1], cs[i], sn[i]); + // apply it to H and s + applPlRot(H[i * (m + 2)], H[i * (m + 2) + 1], cs[i], sn[i]); + applPlRot(s[i], s[i + 1], cs[i], sn[i]); + + if ((resid = fabs(s[i + 1] / normb)) < eps) { + update(A, i + 1, H, m + 1, s, V, x); + eps = resid; + nsteps = j; + delete[] r; + return 0; + } #ifndef NDEBUG - std::cout << "Step " << j << ", resid=" << resid << std::endl; + std::cout << "Step " << j << ", resid=" << resid << std::endl; #endif - } - - update(A, m, H, m + 1, s, V, x); - - // r = b - A x; - blas::copy(n, b, r); - A.amux(D_MONE, x, r); - beta = blas::nrm2(n, r); - - if ((resid = beta / normb) < eps) { - eps = resid; - nsteps = j; - delete[] r; - return 0; - } - } - - eps = resid; - delete[] r; - return 1; + } + + update(A, m, H, m + 1, s, V, x); + + // r = b - A x; + blas::copy(n, b, r); + A.amux(D_MONE, x, r); + beta = blas::nrm2(n, r); + + if ((resid = beta / normb) < eps) { + eps = resid; + nsteps = j; + delete[] r; + return 0; + } + } + + eps = resid; + delete[] r; + return 1; } } // end namespace MathLib diff --git a/MathLib/LinAlg/Solvers/GaussAlgorithm-impl.h b/MathLib/LinAlg/Solvers/GaussAlgorithm-impl.h index 2d7bab3545f..b6c94348af4 100644 --- a/MathLib/LinAlg/Solvers/GaussAlgorithm-impl.h +++ b/MathLib/LinAlg/Solvers/GaussAlgorithm-impl.h @@ -22,36 +22,36 @@ namespace MathLib template <typename MAT_T, typename VEC_T> void GaussAlgorithm<MAT_T, VEC_T>::performLU(MAT_T& A) { - IDX_T const nr(A.getNRows()); - IDX_T const nc(A.getNCols()); + IDX_T const nr(A.getNRows()); + IDX_T const nc(A.getNCols()); - for (IDX_T k=0; k<nc; k++) { - // search pivot - FP_T t = std::abs(A(k, k)); - _perm[k] = k; - for (IDX_T i=k+1; i<nr; i++) { - FP_T const s = std::abs(A(i,k)); - if (s > t) { - t = s; - _perm[k] = i; - } - } + for (IDX_T k=0; k<nc; k++) { + // search pivot + FP_T t = std::abs(A(k, k)); + _perm[k] = k; + for (IDX_T i=k+1; i<nr; i++) { + FP_T const s = std::abs(A(i,k)); + if (s > t) { + t = s; + _perm[k] = i; + } + } - // exchange rows - if (_perm[k] != k) { - for (IDX_T j=0; j<nc; j++) - std::swap (A(_perm[k],j), A(k,j)); - } + // exchange rows + if (_perm[k] != k) { + for (IDX_T j=0; j<nc; j++) + std::swap (A(_perm[k],j), A(k,j)); + } - // eliminate - for (IDX_T i=k+1; i<nr; i++) { - FP_T const l = A(i,k)/A(k,k); - for (IDX_T j=k; j<nc; j++) { - A(i,j) -= A(k,j) * l; - } - A(i,k) = l; - } - } + // eliminate + for (IDX_T i=k+1; i<nr; i++) { + FP_T const l = A(i,k)/A(k,k); + for (IDX_T j=k; j<nc; j++) { + A(i,j) -= A(k,j) * l; + } + A(i,k) = l; + } + } } template <typename MAT_T, typename VEC_T> @@ -59,55 +59,55 @@ template <typename V> void GaussAlgorithm<MAT_T, VEC_T>:: solve (MAT_T& A, V& b, bool decompose) { - _perm.resize(A.getNRows()); + _perm.resize(A.getNRows()); - if (decompose) - performLU(A); - permuteRHS (b); - forwardSolve (A, b); // L z = b, b will be overwritten by z - backwardSolve (A, b); // U x = z, b (z) will be overwritten by x + if (decompose) + performLU(A); + permuteRHS (b); + forwardSolve (A, b); // L z = b, b will be overwritten by z + backwardSolve (A, b); // U x = z, b (z) will be overwritten by x } template <typename MAT_T, typename VEC_T> void GaussAlgorithm<MAT_T, VEC_T>:: solve (MAT_T& A, FP_T* & b, bool decompose) { - _perm.resize(A.getNRows()); + _perm.resize(A.getNRows()); - if (decompose) - performLU(A); - permuteRHS (b); - forwardSolve (A, b); // L z = b, b will be overwritten by z - backwardSolve (A, b); // U x = z, b (z) will be overwritten by x + if (decompose) + performLU(A); + permuteRHS (b); + forwardSolve (A, b); // L z = b, b will be overwritten by z + backwardSolve (A, b); // U x = z, b (z) will be overwritten by x } template <typename MAT_T, typename VEC_T> void GaussAlgorithm<MAT_T, VEC_T>::solve ( - MAT_T& A, VEC_T const& b, VEC_T & x, - bool decompose) + MAT_T& A, VEC_T const& b, VEC_T & x, + bool decompose) { - for (std::size_t k(0); k<A.getNRows(); k++) - x[k] = b[k]; - solve(A, x, decompose); + for (std::size_t k(0); k<A.getNRows(); k++) + x[k] = b[k]; + solve(A, x, decompose); } template <typename MAT_T, typename VEC_T> template <typename V> void GaussAlgorithm<MAT_T, VEC_T>::permuteRHS (V & b) const { - for (IDX_T i=0; i<_perm.size(); i++) { - if (_perm[i] != i) - std::swap(b[i], b[_perm[i]]); - } + for (IDX_T i=0; i<_perm.size(); i++) { + if (_perm[i] != i) + std::swap(b[i], b[_perm[i]]); + } } template <typename MAT_T, typename VEC_T> void GaussAlgorithm<MAT_T, VEC_T>::permuteRHS (VEC_T& b) const { - for (IDX_T i=0; i<_perm.size(); i++) { - if (_perm[i] != i) - std::swap(b[i], b[_perm[i]]); - } + for (IDX_T i=0; i<_perm.size(); i++) { + if (_perm[i] != i) + std::swap(b[i], b[_perm[i]]); + } } } // end namespace MathLib diff --git a/MathLib/LinAlg/Solvers/GaussAlgorithm.h b/MathLib/LinAlg/Solvers/GaussAlgorithm.h index 618a468c05a..5c2555a4832 100644 --- a/MathLib/LinAlg/Solvers/GaussAlgorithm.h +++ b/MathLib/LinAlg/Solvers/GaussAlgorithm.h @@ -36,65 +36,65 @@ template <typename MAT_T, typename VEC_T = typename MAT_T::FP_T*> class GaussAlgorithm { public: - typedef typename MAT_T::FP_T FP_T; - typedef typename MAT_T::IDX_T IDX_T; + typedef typename MAT_T::FP_T FP_T; + typedef typename MAT_T::IDX_T IDX_T; public: - /** - * A direct solver for the (dense) linear system \f$A x = b\f$. - * @param solver_name A name used as a prefix for command line options - * if there are such options available. - * @param option For some solvers the user can give parameters to the - * algorithm. GaussAlgorithm has to fulfill the common interface - * of all solvers of systems of linear equations. For this reason the - * second argument was introduced. - */ - GaussAlgorithm(const std::string solver_name = "", - BaseLib::ConfigTree const*const option = nullptr) - { - (void) solver_name; (void) option; // silence both compiler and doxygen warnings. - } - - /** - * Method solves the linear system \f$A x = b\f$ (based on the LU factorization) - * using forward solve and backward solve. - * @param A the coefficient matrix - * @param b at the beginning the right hand side, at the end the solution - * @param decompose Flag that signals if the LU decomposition should be - * performed or not. If the matrix \f$A\f$ does not change, the LU - * decomposition needs to be performed once only! - * @attention The entries of the given matrix will be changed! - */ - template <typename V> - void solve (MAT_T& A, V & b, bool decompose = true); - - void solve(MAT_T& A, FP_T* & b, bool decompose = true); - - /** - * Method solves the linear system \f$A x = b\f$ (based on the LU factorization) - * using forward solve and backward solve. - * @param A (input) the coefficient matrix - * @param b (input) the right hand side - * @param x (output) the solution - * @param decompose see documentation of the other solve methods. - * @attention The entries of the given matrix will be changed! - */ - void solve(MAT_T& A, VEC_T const& b, VEC_T & x, bool decompose = true); + /** + * A direct solver for the (dense) linear system \f$A x = b\f$. + * @param solver_name A name used as a prefix for command line options + * if there are such options available. + * @param option For some solvers the user can give parameters to the + * algorithm. GaussAlgorithm has to fulfill the common interface + * of all solvers of systems of linear equations. For this reason the + * second argument was introduced. + */ + GaussAlgorithm(const std::string solver_name = "", + BaseLib::ConfigTree const*const option = nullptr) + { + (void) solver_name; (void) option; // silence both compiler and doxygen warnings. + } + + /** + * Method solves the linear system \f$A x = b\f$ (based on the LU factorization) + * using forward solve and backward solve. + * @param A the coefficient matrix + * @param b at the beginning the right hand side, at the end the solution + * @param decompose Flag that signals if the LU decomposition should be + * performed or not. If the matrix \f$A\f$ does not change, the LU + * decomposition needs to be performed once only! + * @attention The entries of the given matrix will be changed! + */ + template <typename V> + void solve (MAT_T& A, V & b, bool decompose = true); + + void solve(MAT_T& A, FP_T* & b, bool decompose = true); + + /** + * Method solves the linear system \f$A x = b\f$ (based on the LU factorization) + * using forward solve and backward solve. + * @param A (input) the coefficient matrix + * @param b (input) the right hand side + * @param x (output) the solution + * @param decompose see documentation of the other solve methods. + * @attention The entries of the given matrix will be changed! + */ + void solve(MAT_T& A, VEC_T const& b, VEC_T & x, bool decompose = true); private: - // void solve (MAT_T& A, VEC_T const& b, bool decompose); - - void performLU(MAT_T& A); - /** - * permute the right hand side vector according to the - * row permutations of the LU factorization - * @param b the entries of the vector b are permuted - */ - template <typename V> void permuteRHS(V & b) const; - void permuteRHS (VEC_T& b) const; - - //! the permutation of the rows - std::vector<IDX_T> _perm; + // void solve (MAT_T& A, VEC_T const& b, bool decompose); + + void performLU(MAT_T& A); + /** + * permute the right hand side vector according to the + * row permutations of the LU factorization + * @param b the entries of the vector b are permuted + */ + template <typename V> void permuteRHS(V & b) const; + void permuteRHS (VEC_T& b) const; + + //! the permutation of the rows + std::vector<IDX_T> _perm; }; } // end namespace MathLib diff --git a/MathLib/LinAlg/Solvers/IterativeLinearSolver.h b/MathLib/LinAlg/Solvers/IterativeLinearSolver.h index 67aa4ae3110..e3de3943c6c 100644 --- a/MathLib/LinAlg/Solvers/IterativeLinearSolver.h +++ b/MathLib/LinAlg/Solvers/IterativeLinearSolver.h @@ -21,8 +21,8 @@ namespace MathLib { class IterativeLinearSolver: public MathLib::LinearSolver { public: - IterativeLinearSolver() {}; - virtual ~IterativeLinearSolver() {}; + IterativeLinearSolver() {}; + virtual ~IterativeLinearSolver() {}; }; } diff --git a/MathLib/LinAlg/Solvers/TriangularSolve-impl.h b/MathLib/LinAlg/Solvers/TriangularSolve-impl.h index 56e48e1c241..4ec9bbfbea3 100644 --- a/MathLib/LinAlg/Solvers/TriangularSolve-impl.h +++ b/MathLib/LinAlg/Solvers/TriangularSolve-impl.h @@ -17,47 +17,47 @@ namespace MathLib { template <typename FP_T, typename VEC_T> void forwardSolve (const DenseMatrix <FP_T> &L, VEC_T& b) { - typedef typename DenseMatrix<FP_T>::IDX_T IDX_T; - IDX_T m (L.getNRows()); - FP_T t; + typedef typename DenseMatrix<FP_T>::IDX_T IDX_T; + IDX_T m (L.getNRows()); + FP_T t; - for (IDX_T r=0; r<m; r++) { - t = 0.0; - for (IDX_T c=0; c<r; c++) { - t += L(r,c)*b[c]; - } - b[r] = b[r]-t; - } + for (IDX_T r=0; r<m; r++) { + t = 0.0; + for (IDX_T c=0; c<r; c++) { + t += L(r,c)*b[c]; + } + b[r] = b[r]-t; + } } template <typename FP_T, typename VEC_T> void backwardSolve (const DenseMatrix <FP_T> &mat, VEC_T& b) { - FP_T t; - typedef typename DenseMatrix<FP_T>::IDX_T IDX_T; - IDX_T m (mat.getNRows()), n(mat.getNCols()); - for (int r=m-1; r>=0; r--) { - t = 0.0; - for (IDX_T c=r+1; c<n; c++) { - t += mat(r,c)*b[c]; - } - b[r] = (b[r]-t) / mat(r,r); - } + FP_T t; + typedef typename DenseMatrix<FP_T>::IDX_T IDX_T; + IDX_T m (mat.getNRows()), n(mat.getNCols()); + for (int r=m-1; r>=0; r--) { + t = 0.0; + for (IDX_T c=r+1; c<n; c++) { + t += mat(r,c)*b[c]; + } + b[r] = (b[r]-t) / mat(r,r); + } } template <typename FP_T, typename VEC_T> void backwardSolve ( DenseMatrix<FP_T> const& mat, VEC_T& x, VEC_T const& b) { - typedef typename DenseMatrix<FP_T>::IDX_T IDX_T; - IDX_T n_cols (mat.getNCols()); - for (int r = (n_cols - 1); r >= 0; r--) { - FP_T t = 0.0; + typedef typename DenseMatrix<FP_T>::IDX_T IDX_T; + IDX_T n_cols (mat.getNCols()); + for (int r = (n_cols - 1); r >= 0; r--) { + FP_T t = 0.0; - for (IDX_T c = r+1; c < n_cols; c++) { - t += mat(r,c) * b[c]; - } - x[r] = (b[r] - t) / mat(r, r); - } + for (IDX_T c = r+1; c < n_cols; c++) { + t += mat(r,c) * b[c]; + } + x[r] = (b[r] - t) / mat(r, r); + } } diff --git a/MathLib/LinAlg/Solvers/blas.h b/MathLib/LinAlg/Solvers/blas.h index 1e1503e10ae..77298c12192 100644 --- a/MathLib/LinAlg/Solvers/blas.h +++ b/MathLib/LinAlg/Solvers/blas.h @@ -114,7 +114,7 @@ extern "C" unsigned *, unsigned *, const double *, double *, unsigned *, double *, unsigned *); void dswap_(const unsigned*, double*, const unsigned*, double*, - const unsigned*); + const unsigned*); /******************************************************************/ //single precision real @@ -185,36 +185,36 @@ extern "C" void stpmv_(const char*, const char*, const char*, const unsigned*, const float*, float*, const unsigned*); void sswap_(const unsigned*, float*, const unsigned*, float*, - const unsigned*); + const unsigned*); } namespace blas { inline void swap(const unsigned n, double* x, const unsigned incx, - double* y, const unsigned incy ) + double* y, const unsigned incy ) { dswap_(&n, x, &incx, y, &incy); } inline void swap(const unsigned n, float* x, const unsigned incx, - float* y, const unsigned incy ) + float* y, const unsigned incy ) { sswap_(&n, x, &incx, y, &incy); } inline void laset(const unsigned m, const unsigned n, const double a, - const double b, double* A, unsigned ldA) + const double b, double* A, unsigned ldA) { dlaset_(JOB_STR, &m, &n, &a, &b, A, &ldA); } inline void lasetu(const unsigned m, const unsigned n, const double a, - const double b, double* A, unsigned ldA) + const double b, double* A, unsigned ldA) { dlaset_(JOB_STR+5, &m, &n, &a, &b, A, &ldA); } inline void lasetl(const unsigned m, const unsigned n, const double a, - const double b, double* A, unsigned ldA) + const double b, double* A, unsigned ldA) { dlaset_(JOB_STR+6, &m, &n, &a, &b, A, &ldA); } @@ -268,24 +268,24 @@ namespace blas } inline void lacpy(const unsigned m, const unsigned n, double* A, - const unsigned ldA, double* B, const unsigned ldB) + const unsigned ldA, double* B, const unsigned ldB) { dlacpy_(JOB_STR, &m, &n, A, &ldA, B, &ldB); } inline void lacpyu(const unsigned m, const unsigned n, double* A, - const unsigned ldA, double* B, const unsigned ldB) + const unsigned ldA, double* B, const unsigned ldB) { dlacpy_(JOB_STR+5, &m, &n, A, &ldA, B, &ldB); } inline void copy(const unsigned n, double* orig, const unsigned inco, - double* dest, const unsigned incd) + double* dest, const unsigned incd) { dcopy_(&n, orig, &inco, dest, &incd); } inline void copy(const unsigned n, float* orig, const unsigned inco, - float* dest, const unsigned incd) + float* dest, const unsigned incd) { scopy_(&n, orig, &inco, dest, &incd); } @@ -304,12 +304,12 @@ namespace blas // Scalar product conj(x)*y inline double scpr(const unsigned n, const double* const v1, - const double* const v2) + const double* const v2) { return ddot_(&n, v1, &N_ONE, v2, &N_ONE); } inline float scpr(const unsigned n, const float* const v1, - const float* const v2) + const float* const v2) { return sdot_(&n, v1, &N_ONE, v2, &N_ONE); } @@ -334,12 +334,12 @@ namespace blas } inline void axpy(const unsigned n, const double d, const double* const x, - double* const y) + double* const y) { daxpy_(&n, &d, x, &N_ONE, y, &N_ONE); } inline void axpy(const unsigned n, const float d, const float* const x, - float* const y) + float* const y) { saxpy_(&n, &d, x, &N_ONE, y, &N_ONE); } @@ -376,319 +376,319 @@ namespace blas // y = d Ax inline void gemv(const unsigned m, const unsigned n, double d, - const double* A, double *x, double *y) + const double* A, double *x, double *y) { dgemv_(JOB_STR, &m, &n, &d, A, &m, x, &N_ONE, &D_ZERO, y, &N_ONE); } inline void gemv(const unsigned m, const unsigned n, float d, const float* A, - float *x, float *y) + float *x, float *y) { sgemv_(JOB_STR, &m, &n, &d, A, &m, x, &N_ONE, &S_ZERO, y, &N_ONE); } // y += d Ax inline void gemva(const unsigned m, const unsigned n, double d, const double* A, - const double *x, double *y) + const double *x, double *y) { dgemv_(JOB_STR, &m, &n, &d, A, &m, x, &N_ONE, &D_ONE, y, &N_ONE); } inline void gemva(const unsigned m, const unsigned n, float d, const float* A, - const float *x, float *y) + const float *x, float *y) { sgemv_(JOB_STR, &m, &n, &d, A, &m, x, &N_ONE, &S_ONE, y, &N_ONE); } // y = d A^H x inline void gemhv(const unsigned m, const unsigned n, double d, const double* A, - const double *x, double *y) + const double *x, double *y) { dgemv_(JOB_STR+1, &m, &n, &d, A, &m, x, &N_ONE, &D_ZERO, y, &N_ONE); } inline void gemhv(const unsigned m, const unsigned n, float d, const float* A, - const float *x, float *y) + const float *x, float *y) { sgemv_(JOB_STR+1, &m, &n, &d, A, &m, x, &N_ONE, &S_ZERO, y, &N_ONE); } // y += d A^H x inline void gemhva(const unsigned m, const unsigned n, double d, - const double* A, unsigned ldA, const double *x, unsigned incx, - double *y, unsigned incy) + const double* A, unsigned ldA, const double *x, unsigned incx, + double *y, unsigned incy) { dgemv_(JOB_STR+1, &m, &n, &d, A, &ldA, x, &incx, &D_ONE, y, &incy); } inline void gemhva(const unsigned m, const unsigned n, float d, - const float* A, unsigned ldA, const float *x, unsigned incx, - float *y, unsigned incy) + const float* A, unsigned ldA, const float *x, unsigned incx, + float *y, unsigned incy) { sgemv_(JOB_STR+1, &m, &n, &d, A, &ldA, x, &incx, &S_ONE, y, &incy); } inline void gemhva(const unsigned m, const unsigned n, double d, const double* A, - const double *x, double *y) + const double *x, double *y) { gemhva(m, n, d, A, m, x, N_ONE, y, N_ONE); } inline void gemhva(const unsigned m, const unsigned n, float d, const float* A, - const float *x, float *y) + const float *x, float *y) { gemhva(m, n, d, A, m, x, N_ONE, y, N_ONE); } // y += d A x (A symm. dense in packed format) inline void gemsva(const unsigned n, double d, double* A, - double *x, double *y) + double *x, double *y) { dspmv_(JOB_STR+5, &n, &d, A, x, &N_ONE, &D_ONE, y, &N_ONE); } inline void gemsva(const unsigned n, float d, float* A, - float *x, float *y) + float *x, float *y) { sspmv_(JOB_STR+5, &n, &d, A, x, &N_ONE, &S_ONE, y, &N_ONE); } // sovles Ax=B, A is a triangluar Matrix inline void gtsv(const unsigned* n, const double* DiagLower, - const double* Diag, const double* DiagUpper, - const double* B, const int* INFO) + const double* Diag, const double* DiagUpper, + const double* B, const int* INFO) { dgtsv_(n, &N_ONE, DiagLower, Diag, DiagUpper, B, n, INFO); } inline void gtsv(const unsigned* n, const float* DiagLower, - const float* Diag, const float* DiagUpper, - const float* B, const int* INFO) + const float* Diag, const float* DiagUpper, + const float* B, const int* INFO) { sgtsv_(n, &N_ONE, DiagLower, Diag, DiagUpper, B, n, INFO); } // C = d A B, A is m x p, B is p x n inline void gemm(const unsigned m, const unsigned p, const unsigned n, - const double d, const double* const A, const unsigned ldA, - const double* const B, const unsigned ldB, - double* C, const unsigned ldC) + const double d, const double* const A, const unsigned ldA, + const double* const B, const unsigned ldB, + double* C, const unsigned ldC) { dgemm_(JOB_STR, JOB_STR, &m, &n, &p, &d, A, &ldA, B, &ldB, - &D_ZERO, C, &ldC); + &D_ZERO, C, &ldC); } inline void gemm(const unsigned m, const unsigned p, const unsigned n, - const float d, const float* const A, const unsigned ldA, - const float* const B, const unsigned ldB, - float* C, const unsigned ldC) + const float d, const float* const A, const unsigned ldA, + const float* const B, const unsigned ldB, + float* C, const unsigned ldC) { sgemm_(JOB_STR, JOB_STR, &m, &n, &p, &d, A, &ldA, B, &ldB, - &S_ZERO, C, &ldC); + &S_ZERO, C, &ldC); } // C += d A B, A is m x p, B is p x n inline void gemma(const unsigned m, const unsigned p, const unsigned n, - const double d, const double* const A, const unsigned ldA, - const double* const B, const unsigned ldB, - double* C, const unsigned ldC) + const double d, const double* const A, const unsigned ldA, + const double* const B, const unsigned ldB, + double* C, const unsigned ldC) { dgemm_(JOB_STR, JOB_STR, &m, &n, &p, &d, A, &ldA, B, &ldB, - &D_ONE, C, &ldC); + &D_ONE, C, &ldC); } inline void gemma(const unsigned m, const unsigned p, const unsigned n, - const float d, const float* const A, const unsigned ldA, - const float* const B, const unsigned ldB, - float* C, const unsigned ldC) + const float d, const float* const A, const unsigned ldA, + const float* const B, const unsigned ldB, + float* C, const unsigned ldC) { sgemm_(JOB_STR, JOB_STR, &m, &n, &p, &d, A, &ldA, B, &ldB, - &S_ONE, C, &ldC); + &S_ONE, C, &ldC); } // C = d A^H B, A is m x p, B is m x n inline void gemhm(const unsigned m, const unsigned p, const unsigned n, - const double d, const double* A, const unsigned ldA, - const double *B, const unsigned ldB, - double* C, const unsigned ldC) + const double d, const double* A, const unsigned ldA, + const double *B, const unsigned ldB, + double* C, const unsigned ldC) { dgemm_(JOB_STR+1, JOB_STR, &p, &n, &m, &d, A, &ldA, B, &ldB, - &D_ZERO, C, &ldC); + &D_ZERO, C, &ldC); } inline void gemhm(const unsigned m, const unsigned p, const unsigned n, - const float d, const float* const A, const unsigned ldA, - const float* const B, const unsigned ldB, - float* C, const unsigned ldC) + const float d, const float* const A, const unsigned ldA, + const float* const B, const unsigned ldB, + float* C, const unsigned ldC) { sgemm_(JOB_STR+1, JOB_STR, &p, &n, &m, &d, A, &ldA, B, &ldB, - &S_ZERO, C, &ldC); + &S_ZERO, C, &ldC); } // C += d A^H B, A is m x p, B is m x n inline void gemhma(unsigned m, unsigned p, unsigned n, double d, - const double* const A, const unsigned ldA, const double* const B, - const unsigned ldB, double* C, unsigned ldC) + const double* const A, const unsigned ldA, const double* const B, + const unsigned ldB, double* C, unsigned ldC) { dgemm_(JOB_STR+1, JOB_STR, &p, &n, &m, &d, A, &ldA, B, &ldB, - &D_ONE, C, &ldC); + &D_ONE, C, &ldC); } inline void gemhma(unsigned m, unsigned p, unsigned n, float d, - const float* const A, const unsigned ldA, const float* const B, - const unsigned ldB, float* C, unsigned ldC) + const float* const A, const unsigned ldA, const float* const B, + const unsigned ldB, float* C, unsigned ldC) { sgemm_(JOB_STR+1, JOB_STR, &p, &n, &m, &d, A, &ldA, B, &ldB, - &S_ONE, C, &ldC); + &S_ONE, C, &ldC); } // C = d A B^H, A is m x p, B is n x p inline void gemmh(const unsigned m, const unsigned p, const unsigned n, - const double d, const double* const A, const unsigned ldA, - const double* const B, const unsigned ldB, - double* C, const unsigned ldC) + const double d, const double* const A, const unsigned ldA, + const double* const B, const unsigned ldB, + double* C, const unsigned ldC) { dgemm_(JOB_STR, JOB_STR+1, &m, &n, &p, &d, A, &ldA, B, &ldB, - &D_ZERO, C, &ldC); + &D_ZERO, C, &ldC); } inline void gemmh(const unsigned m, const unsigned p, const unsigned n, - const float d, const float* const A, const unsigned ldA, - const float* const B, const unsigned ldB, - float* C, const unsigned ldC) + const float d, const float* const A, const unsigned ldA, + const float* const B, const unsigned ldB, + float* C, const unsigned ldC) { sgemm_(JOB_STR, JOB_STR+1, &m, &n, &p, &d, A, &ldA, B, &ldB, - &S_ZERO, C, &ldC); + &S_ZERO, C, &ldC); } // C += d A B^H, A is m x p, B is n x p inline void gemmha(const unsigned m, const unsigned p, const unsigned n, - const double d, const double* const A, const unsigned ldA, - const double* const B, const unsigned ldB, - double* C, const unsigned ldC) + const double d, const double* const A, const unsigned ldA, + const double* const B, const unsigned ldB, + double* C, const unsigned ldC) { dgemm_(JOB_STR, JOB_STR+1, &m, &n, &p, &d, A, &ldA, B, &ldB, - &D_ONE, C, &ldC); + &D_ONE, C, &ldC); } inline void gemmha(const unsigned m, const unsigned p, const unsigned n, - const float d, const float* const A, const unsigned ldA, - const float* const B, const unsigned ldB, - float* C, const unsigned ldC) + const float d, const float* const A, const unsigned ldA, + const float* const B, const unsigned ldB, + float* C, const unsigned ldC) { sgemm_(JOB_STR, JOB_STR+1, &m, &n, &p, &d, A, &ldA, B, &ldB, - &S_ONE, C, &ldC); + &S_ONE, C, &ldC); } inline void gemmha(const unsigned m, const unsigned p, const unsigned n, - const double* const A, const unsigned ldA, - const double* const B, const unsigned ldB, - double* C, const unsigned ldC) + const double* const A, const unsigned ldA, + const double* const B, const unsigned ldB, + double* C, const unsigned ldC) { dgemm_(JOB_STR, JOB_STR+1, &m, &n, &p, &D_ONE, A, &ldA, B, &ldB, - &D_ONE, C, &ldC); + &D_ONE, C, &ldC); } inline void gemmha(const unsigned m, const unsigned p, const unsigned n, - const float* const A, const unsigned ldA, - const float* const B, const unsigned ldB, - float* C, const unsigned ldC) + const float* const A, const unsigned ldA, + const float* const B, const unsigned ldB, + float* C, const unsigned ldC) { sgemm_(JOB_STR, JOB_STR+1, &m, &n, &p, &S_ONE, A, &ldA, B, &ldB, - &S_ONE, C, &ldC); + &S_ONE, C, &ldC); } // C = d A^H B^H, A is p x m, B is n x p inline void gemhmh(const unsigned m, const unsigned p, const unsigned n, - const double d, const double* const A, const unsigned ldA, - const double* const B, const unsigned ldB, - double* C, const unsigned ldC) + const double d, const double* const A, const unsigned ldA, + const double* const B, const unsigned ldB, + double* C, const unsigned ldC) { dgemm_(JOB_STR+1, JOB_STR+1, &m, &n, &p, &d, A, &ldA, B, &ldB, - &D_ZERO, C, &ldC); + &D_ZERO, C, &ldC); } inline void gemhmh(const unsigned m, const unsigned p, const unsigned n, - const float d, const float* const A, const unsigned ldA, - const float* const B, const unsigned ldB, - float* C, const unsigned ldC) + const float d, const float* const A, const unsigned ldA, + const float* const B, const unsigned ldB, + float* C, const unsigned ldC) { sgemm_(JOB_STR+1, JOB_STR+1, &m, &n, &p, &d, A, &ldA, B, &ldB, - &S_ZERO, C, &ldC); + &S_ZERO, C, &ldC); } //C += d*AB, A is mxm (packed upper half is stored), B is mxn and regular matrix inline void sygemma(const unsigned m, const unsigned n, - const double* const A, const double* const B, - const double d, double* const C) + const double* const A, const double* const B, + const double d, double* const C) { for(unsigned i=0;i<m;i++){ for(unsigned j=0;j<n;j++){ - for(unsigned k=i;k<m;k++){ - if(i==k){ - C[j*m+i] += d*A[i+k*(k+1)/2]*B[k+j*m]; - }else{ - C[j*m+i] += d*A[i+k*(k+1)/2]*B[k+j*m]; - C[j*m+k] += d*A[i+k*(k+1)/2]*B[i+j*m]; - } - } + for(unsigned k=i;k<m;k++){ + if(i==k){ + C[j*m+i] += d*A[i+k*(k+1)/2]*B[k+j*m]; + }else{ + C[j*m+i] += d*A[i+k*(k+1)/2]*B[k+j*m]; + C[j*m+k] += d*A[i+k*(k+1)/2]*B[i+j*m]; + } + } } } } inline void sygemma(const unsigned m, const unsigned n, - const float* const A, const float* const B, - const float d, float* const C) + const float* const A, const float* const B, + const float d, float* const C) { for(unsigned i=0;i<m;i++){ for(unsigned j=0;j<n;j++){ - for(unsigned k=i;k<m;k++){ - if(i==k){ - C[j*m+i] += d*A[i+k*(k+1)/2]*B[k+j*m]; - }else{ - C[j*m+i] += d*A[i+k*(k+1)/2]*B[k+j*m]; - C[j*m+k] += d*A[i+k*(k+1)/2]*B[i+j*m]; - } - } + for(unsigned k=i;k<m;k++){ + if(i==k){ + C[j*m+i] += d*A[i+k*(k+1)/2]*B[k+j*m]; + }else{ + C[j*m+i] += d*A[i+k*(k+1)/2]*B[k+j*m]; + C[j*m+k] += d*A[i+k*(k+1)/2]*B[i+j*m]; + } + } } } } //C += d*AB, A is mxn and regular matrix, B is nxn (packed upper half is stored) inline void gesymma(const unsigned m, const unsigned n, - const double* const A, const double* const B, - const double d, double* const C) + const double* const A, const double* const B, + const double d, double* const C) { for(unsigned i=0;i<m;i++){ for(unsigned j=0;j<n;j++){ - for(unsigned k=j;k<n;k++){ - if(j==k) - C[j*m+i] += d*A[i+k*m]*B[k+j*(j+1)/2]; - else{ - C[j*m+i] += d*A[i+k*m]*B[j+k*(k+1)/2]; - C[k*m+i] += d*A[i+j*m]*B[j+k*(k+1)/2]; - } - } + for(unsigned k=j;k<n;k++){ + if(j==k) + C[j*m+i] += d*A[i+k*m]*B[k+j*(j+1)/2]; + else{ + C[j*m+i] += d*A[i+k*m]*B[j+k*(k+1)/2]; + C[k*m+i] += d*A[i+j*m]*B[j+k*(k+1)/2]; + } + } } } } inline void gesymma(const unsigned m, const unsigned n, - const float* const A, const float* const B, - const float d, float* const C) + const float* const A, const float* const B, + const float d, float* const C) { for(unsigned i=0;i<m;i++){ for(unsigned j=0;j<n;j++){ - for(unsigned k=j;k<n;k++){ - if(j==k) - C[j*m+i] += d*A[i+k*m]*B[k+j*(j+1)/2]; - else{ - C[j*m+i] += d*A[i+k*m]*B[j+k*(k+1)/2]; - C[k*m+i] += d*A[i+j*m]*B[j+k*(k+1)/2]; - } - } + for(unsigned k=j;k<n;k++){ + if(j==k) + C[j*m+i] += d*A[i+k*m]*B[k+j*(j+1)/2]; + else{ + C[j*m+i] += d*A[i+k*m]*B[j+k*(k+1)/2]; + C[k*m+i] += d*A[i+j*m]*B[j+k*(k+1)/2]; + } + } } } } // C += d A^H A, C is a symm. matrix (packed upper half is stored), A is mxn inline void symhm(const unsigned m, const unsigned n, const double* const A, - const double d, double* C) + const double d, double* C) { for (unsigned j=0; j<n; ++j) { for (unsigned i=0; i<=j; ++i) { - double sum = 0.0; - for (unsigned k=0; k<m; ++k) sum += A[k+i*m] * A[k+j*m]; - C[i+j*(j+1)/2] += d * sum; + double sum = 0.0; + for (unsigned k=0; k<m; ++k) sum += A[k+i*m] * A[k+j*m]; + C[i+j*(j+1)/2] += d * sum; } } } inline void symhm(const unsigned m, const unsigned n, const float* const A, - const float d, float* C) + const float d, float* C) { for (unsigned j=0; j<n; ++j) { for (unsigned i=0; i<=j; ++i) { - float sum = 0.0; - for (unsigned k=0; k<m; ++k) sum += A[k+i*m] * A[k+j*m]; - C[i+j*(j+1)/2] += d * sum; + float sum = 0.0; + for (unsigned k=0; k<m; ++k) sum += A[k+i*m] * A[k+j*m]; + C[i+j*(j+1)/2] += d * sum; } } } @@ -698,8 +698,8 @@ namespace blas { for (unsigned k=0; k<n; ++k) { for (unsigned j=0; j<=n; ++j) { - double e = d * A[j+k*m]; - for (unsigned i=0; i<j; ++i) C[i+j*(j+1)/2] += e * A[i+k*m]; + double e = d * A[j+k*m]; + for (unsigned i=0; i<j; ++i) C[i+j*(j+1)/2] += e * A[i+k*m]; } } } @@ -707,59 +707,59 @@ namespace blas { for (unsigned k=0; k<n; ++k) { for (unsigned j=0; j<n; ++j) { - float e = d * A[j+k*m]; - for (unsigned i=0; i<=j; ++i) C[i+j*(j+1)/2] += e * A[i+k*m]; + float e = d * A[j+k*m]; + for (unsigned i=0; i<=j; ++i) C[i+j*(j+1)/2] += e * A[i+k*m]; } } } // Singular Value Decomposition inline int gesvdS(unsigned m, unsigned n, double* A, double* S, - double* U, unsigned ldU, double* VT, unsigned ldVT, - unsigned nwk, double* wk) + double* U, unsigned ldU, double* VT, unsigned ldVT, + unsigned nwk, double* wk) { int INF; dgesvd_(JOB_STR+3, JOB_STR+3, &m, &n, A, &m, S, U, &ldU, VT, &ldVT, - wk, &nwk, &INF); + wk, &nwk, &INF); return INF; } inline int gesvd(unsigned m, unsigned n, double* A, double* S, - double* VT, unsigned ldVT, unsigned nwk, double* wk) + double* VT, unsigned ldVT, unsigned nwk, double* wk) { int INF; dgesvd_(JOB_STR+2, JOB_STR+3, &m, &n, A, &m, S, A, &m, VT, &ldVT, - wk, &nwk, &INF); + wk, &nwk, &INF); return INF; } inline int gesvd(unsigned m, unsigned n, float* A, float* S, - float* VT, unsigned ldVT, unsigned nwk, float* wk) + float* VT, unsigned ldVT, unsigned nwk, float* wk) { int INF; sgesvd_(JOB_STR+2, JOB_STR+3, &m, &n, A, &m, S, A, &m, VT, &ldVT, - wk, &nwk, &INF); + wk, &nwk, &INF); return INF; } inline int gesvd(unsigned m, unsigned n, double* A, double* S, - double* U, unsigned ldU, double* VT, unsigned ldVT, - unsigned nwk, double* wk) + double* U, unsigned ldU, double* VT, unsigned ldVT, + unsigned nwk, double* wk) { int INF; dgesvd_(JOB_STR+9, JOB_STR+9, &m, &n, A, &m, S, U, &ldU, VT, &ldVT, - wk, &nwk, &INF); + wk, &nwk, &INF); return INF; } // compute singular values inline int svals(unsigned m, unsigned n, double* A, double* S, - unsigned nwk, double* wk) + unsigned nwk, double* wk) { int INF; dgesvd_(JOB_STR, JOB_STR, &m, &n, A, &m, S, A, &m, A, &n, wk, &nwk, &INF); return INF; } inline int svals(unsigned m, unsigned n, float* A, float* S, - unsigned nwk, float* wk) + unsigned nwk, float* wk) { int INF; sgesvd_(JOB_STR, JOB_STR, &m, &n, A, &m, S, A, &m, A, &n, wk, &nwk, &INF); @@ -802,28 +802,28 @@ namespace blas // QR factorisation inline int geqrf(const unsigned m, const unsigned n, double* A, - double* tau, unsigned nwk, double* wk) + double* tau, unsigned nwk, double* wk) { int INF; dgeqrf_(&m, &n, A, &m, tau, wk, &nwk, &INF); return INF; } inline int geqrf(const unsigned m, const unsigned n, float* A, - float* tau, unsigned nwk, float* wk) + float* tau, unsigned nwk, float* wk) { int INF; sgeqrf_(&m, &n, A, &m, tau, wk, &nwk, &INF); return INF; } inline int geqrf(const unsigned m, const unsigned n, double* A, - const unsigned ldA, double* tau, unsigned nwk, double* wk) + const unsigned ldA, double* tau, unsigned nwk, double* wk) { int INF; dgeqrf_(&m, &n, A, &ldA, tau, wk, &nwk, &INF); return INF; } inline int geqrf(const unsigned m, const unsigned n, float* A, - const unsigned ldA, float* tau, unsigned nwk, float* wk) + const unsigned ldA, float* tau, unsigned nwk, float* wk) { int INF; sgeqrf_(&m, &n, A, &ldA, tau, wk, &nwk, &INF); @@ -832,136 +832,136 @@ namespace blas // Multiply a general Matrix with the Q-Matrix (QR factorization), Q C inline int ormqr(const unsigned m, const unsigned n, const unsigned p, - double* A, double* tau, double* C, - unsigned nwk, double* wk) + double* A, double* tau, double* C, + unsigned nwk, double* wk) { int INF; dormqr_(JOB_STR+6, JOB_STR, &m, &n, &p, A, &m, tau, C, &m, wk, &nwk, &INF); return INF; } inline int ormqr(const unsigned m, const unsigned n, const unsigned p, - float* A, float* tau, float* C, - unsigned nwk, float* wk) + float* A, float* tau, float* C, + unsigned nwk, float* wk) { int INF; sormqr_(JOB_STR+6, JOB_STR, &m, &n, &p, A, &m, tau, C, &m, wk, &nwk, &INF); return INF; } inline int ormqr(const unsigned m, const unsigned n, const unsigned p, - double* A, const unsigned ldA, double* tau, double* C, - const unsigned ldC, unsigned nwk, double* wk) + double* A, const unsigned ldA, double* tau, double* C, + const unsigned ldC, unsigned nwk, double* wk) { int INF; dormqr_(JOB_STR+6, JOB_STR, &m, &n, &p, A, &ldA, tau, C, &ldC, wk, &nwk, - &INF); + &INF); return INF; } inline int ormqr(const unsigned m, const unsigned n, const unsigned p, - float* A, const unsigned ldA, float* tau, float* C, - const unsigned ldC, unsigned nwk, float* wk) + float* A, const unsigned ldA, float* tau, float* C, + const unsigned ldC, unsigned nwk, float* wk) { int INF; sormqr_(JOB_STR+6, JOB_STR, &m, &n, &p, A, &ldA, tau, C, &ldC, wk, &nwk, - &INF); + &INF); return INF; } // Q^H C inline int ormqrh(const unsigned m, const unsigned n, const unsigned p, - double* A, const unsigned ldA, double* tau, double* C, - const unsigned ldC, unsigned nwk, double* wk) + double* A, const unsigned ldA, double* tau, double* C, + const unsigned ldC, unsigned nwk, double* wk) { int INF; dormqr_(JOB_STR+6, JOB_STR+1, &m, &n, &p, A, &ldA, tau, C, &ldC, wk, &nwk, - &INF); + &INF); return INF; } inline int ormqrh(const unsigned m, const unsigned n, const unsigned p, - float* A, const unsigned ldA, float* tau, float* C, - const unsigned ldC, unsigned nwk, float* wk) + float* A, const unsigned ldA, float* tau, float* C, + const unsigned ldC, unsigned nwk, float* wk) { int INF; sormqr_(JOB_STR+6, JOB_STR+1, &m, &n, &p, A, &ldA, tau, C, &ldC, wk, &nwk, - &INF); + &INF); return INF; } inline int ormqrh(const unsigned m, const unsigned n, const unsigned p, - double* A, const unsigned ldA, double* tau, double* C, - unsigned nwk, double* wk) + double* A, const unsigned ldA, double* tau, double* C, + unsigned nwk, double* wk) { int INF; dormqr_(JOB_STR+6, JOB_STR+1, &m, &n, &p, A, &ldA, tau, C, &m, wk, &nwk, - &INF); + &INF); return INF; } inline int ormqrh(const unsigned m, const unsigned n, const unsigned p, - float* A, const unsigned ldA, float* tau, float* C, - unsigned nwk, float* wk) + float* A, const unsigned ldA, float* tau, float* C, + unsigned nwk, float* wk) { int INF; sormqr_(JOB_STR+6, JOB_STR+1, &m, &n, &p, A, &ldA, tau, C, &m, wk, &nwk, - &INF); + &INF); return INF; } inline int ormqrh(const unsigned m, const unsigned n, const unsigned p, - double* A, double* tau, double* C, - unsigned nwk, double* wk) + double* A, double* tau, double* C, + unsigned nwk, double* wk) { int INF; dormqr_(JOB_STR+6, JOB_STR+1, &m, &n, &p, A, &m, tau, C, &m, wk, &nwk, &INF); return INF; } inline int ormqrh(const unsigned m, const unsigned n, const unsigned p, - float* A, float* tau, float* C, - unsigned nwk, float* wk) + float* A, float* tau, float* C, + unsigned nwk, float* wk) { int INF; sormqr_(JOB_STR+6, JOB_STR+1, &m, &n, &p, A, &m, tau, C, &m, wk, &nwk, &INF); return INF; } inline int morqr(const unsigned m, const unsigned n, const unsigned p, - double* A, const unsigned ldA, double* tau, double* C, - const unsigned ldC, unsigned nwk, double* wk) + double* A, const unsigned ldA, double* tau, double* C, + const unsigned ldC, unsigned nwk, double* wk) { int INF; dormqr_(JOB_STR+8, JOB_STR, &m, &n, &p, A, &ldA, tau, C, &ldC, wk, &nwk, - &INF); + &INF); return INF; } inline int morqr(const unsigned m, const unsigned n, const unsigned p, - float* A, const unsigned ldA, float* tau, float* C, - const unsigned ldC, unsigned nwk, float* wk) + float* A, const unsigned ldA, float* tau, float* C, + const unsigned ldC, unsigned nwk, float* wk) { int INF; sormqr_(JOB_STR+8, JOB_STR, &m, &n, &p, A, &ldA, tau, C, &ldC, wk, &nwk, - &INF); + &INF); return INF; } inline void ger(unsigned M, unsigned N, double d, double* X, unsigned INCX, - double* y, unsigned INCY, double* A, unsigned LDA) + double* y, unsigned INCY, double* A, unsigned LDA) { dger_(&M, &N, &d, X, &INCX, y, &INCY, A, &LDA); } inline void ger(unsigned M, unsigned N, float d, float* X, unsigned INCX, - float* y, unsigned INCY, float* A, unsigned LDA) + float* y, unsigned INCY, float* A, unsigned LDA) { sger_(&M, &N, &d, X, &INCX, y, &INCY, A, &LDA); } // return Q-Matrix (QR factorization) in A inline int orgqr(const unsigned m, const unsigned n, double* A, double* tau, - unsigned nwk, double* wk) + unsigned nwk, double* wk) { int INF; dorgqr_(&m, &n, &n, A, &m, tau, wk, &nwk, &INF); return INF; } inline int orgqr(const unsigned m, const unsigned n, float* A, float* tau, - unsigned nwk, float* wk) + unsigned nwk, float* wk) { int INF; sorgqr_(&m, &n, &n, A, &m, tau, wk, &nwk, &INF); @@ -980,10 +980,10 @@ namespace blas // product of an upper triangular matrix U and a matrix A, A:=U A inline void utrgemm(unsigned m, unsigned n, double* U, unsigned ldU, - double* A, unsigned ldA) + double* A, unsigned ldA) { dtrmm_(JOB_STR+6, JOB_STR+5, JOB_STR, JOB_STR, &m, &n, &D_ONE, U, &ldU, - A, &ldA); + A, &ldA); } @@ -993,9 +993,9 @@ namespace blas { for (unsigned j=0; j<n; ++j) { for (unsigned l=j; l<n; ++l) { - unsigned idl = l*(l+1)/2; - double e = d * U[j+idl]; - for (unsigned i=0; i<=j; ++i) A[i+j*(j+1)/2] += e * U[i+idl]; + unsigned idl = l*(l+1)/2; + double e = d * U[j+idl]; + for (unsigned i=0; i<=j; ++i) A[i+j*(j+1)/2] += e * U[i+idl]; } } } @@ -1004,9 +1004,9 @@ namespace blas { for (unsigned j=0; j<n; ++j) { for (unsigned l=j; l<n; ++l) { - unsigned idl = l*(l+1)/2; - float e = d * U[j+idl]; - for (unsigned i=0; i<=j; ++i) A[i+j*(j+1)/2] += e * U[i+idl]; + unsigned idl = l*(l+1)/2; + float e = d * U[j+idl]; + for (unsigned i=0; i<=j; ++i) A[i+j*(j+1)/2] += e * U[i+idl]; } } } @@ -1195,14 +1195,14 @@ namespace lapack // lower triangular solve inline void ltrs(const unsigned n, double* A, - const unsigned p, double* B, const unsigned ldB) + const unsigned p, double* B, const unsigned ldB) { // dtptrs_(JOB_STR+6, JOB_STR, JOB_STR+5, &n, &p, A, B, &ldB, &inf); for (unsigned i=0; i<p; ++i) dtpsv_(JOB_STR+6, JOB_STR, JOB_STR+5, &n, A, B+i*ldB, &N_ONE); } inline void ltrs(const unsigned n, float* A, - const unsigned p, float* B, const unsigned ldB) + const unsigned p, float* B, const unsigned ldB) { // stptrs_(JOB_STR+6, JOB_STR, JOB_STR+5, &n, &p, A, B, &ldB, &inf); for (unsigned i=0; i<p; ++i) @@ -1211,14 +1211,14 @@ namespace lapack // lower triangular transpose solve inline void ltrhs(const unsigned n, double* A, - const unsigned p, double* B, const unsigned ldB) + const unsigned p, double* B, const unsigned ldB) { // dtptrs_(JOB_STR+6, JOB_STR+1, JOB_STR+5, &n, &p, A, B, &ldB, &inf); for (unsigned i=0; i<p; ++i) dtpsv_(JOB_STR+6, JOB_STR+1, JOB_STR+5, &n, A, B+i*ldB, &N_ONE); } inline void ltrhs(const unsigned n, float* A, - const unsigned p, float* B, const unsigned ldB) + const unsigned p, float* B, const unsigned ldB) { // stptrs_(JOB_STR+6, JOB_STR+1, JOB_STR+5, &n, &p, A, B, &ldB, &inf); for (unsigned i=0; i<p; ++i) @@ -1228,84 +1228,84 @@ namespace lapack // unit upper triangular solve (with L and R stored in one matrix) // XR=B, R is pxp, B is nxp inline void utrcs(const unsigned p, const double* LR, const unsigned ldLR, - const unsigned n, double* X, const unsigned ldX) + const unsigned n, double* X, const unsigned ldX) { dtrsm_(JOB_STR+8, JOB_STR+5, JOB_STR, JOB_STR+5, &n, &p, &D_ONE, - LR, &ldLR, X, &ldX); + LR, &ldLR, X, &ldX); } inline void utrcs(const unsigned p, const float* LR, const unsigned ldLR, - const unsigned n, float* X, const unsigned ldX) + const unsigned n, float* X, const unsigned ldX) { strsm_(JOB_STR+8, JOB_STR+5, JOB_STR, JOB_STR+5, &n, &p, &S_ONE, - LR, &ldLR, X, &ldX); + LR, &ldLR, X, &ldX); } // unit upper triangular solve (with L and R stored in one matrix) // RX=B, R is nxn, B is nxp inline void utlcs(const unsigned n, float* LR, const unsigned ldLR, - const unsigned p, float* X, const unsigned ldX) + const unsigned p, float* X, const unsigned ldX) { strsm_(JOB_STR+6, JOB_STR+5, JOB_STR, JOB_STR+5, &n, &p, &S_ONE, - LR, &ldLR, X, &ldX); + LR, &ldLR, X, &ldX); } inline void utlcs(const unsigned n, double* LR, const unsigned ldLR, - const unsigned p, double* X, const unsigned ldX) + const unsigned p, double* X, const unsigned ldX) { dtrsm_(JOB_STR+6, JOB_STR+5, JOB_STR, JOB_STR+5, &n, &p, &D_ONE, - LR, &ldLR, X, &ldX); + LR, &ldLR, X, &ldX); } // unit lower triangular solve (with L and R stored in one matrix) // XL=B, L is pxp, B is nxp inline void ltrcs(const unsigned p, float* LR, const unsigned ldLR, - const unsigned n, float* X, const unsigned ldX) + const unsigned n, float* X, const unsigned ldX) { strsm_(JOB_STR+8, JOB_STR+6, JOB_STR, JOB_STR+5, &n, &p, &S_ONE, - LR, &ldLR, X, &ldX); + LR, &ldLR, X, &ldX); } inline void ltrcs(const unsigned p, double* LR, const unsigned ldLR, - const unsigned n, double* X, const unsigned ldX) + const unsigned n, double* X, const unsigned ldX) { dtrsm_(JOB_STR+8, JOB_STR+6, JOB_STR, JOB_STR+5, &n, &p, &D_ONE, - LR, &ldLR, X, &ldX); + LR, &ldLR, X, &ldX); } // unit lower triangular transposed solve (with L and R stored in one matrix) // XL^T=B, L is pxp, B is nxp inline void ltrhcs(const unsigned p, double* LR, const unsigned ldLR, - const unsigned n, double* X, const unsigned ldX) + const unsigned n, double* X, const unsigned ldX) { dtrsm_(JOB_STR+8, JOB_STR+6, JOB_STR+1, JOB_STR+5, &n, &p, &D_ONE, - LR, &ldLR, X, &ldX); + LR, &ldLR, X, &ldX); } // unit lower triangular solve (with L and R stored in one matrix) // LX=B, L is nxn, B is nxp inline void ltlcs(const unsigned n, float* LR, const unsigned ldLR, - const unsigned p, float* X, const unsigned ldX) + const unsigned p, float* X, const unsigned ldX) { strsm_(JOB_STR+6, JOB_STR+6, JOB_STR, JOB_STR+5, &n, &p, &S_ONE, - LR, &ldLR, X, &ldX); + LR, &ldLR, X, &ldX); } inline void ltlcs(const unsigned n, double* LR, const unsigned ldLR, - const unsigned p, double* X, const unsigned ldX) + const unsigned p, double* X, const unsigned ldX) { dtrsm_(JOB_STR+6, JOB_STR+6, JOB_STR, JOB_STR+5, &n, &p, &D_ONE, - LR, &ldLR, X, &ldX); + LR, &ldLR, X, &ldX); } // upper triangular solve inline void utrs(const unsigned n, double* A, - const unsigned p, double* B, const unsigned ldB) + const unsigned p, double* B, const unsigned ldB) { // dtptrs_(JOB_STR+5, JOB_STR, JOB_STR, &n, &p, A, B, &ldB, &inf); for (unsigned i=0; i<p; ++i) dtpsv_(JOB_STR+5, JOB_STR, JOB_STR, &n, A, B+i*ldB, &N_ONE); } inline void utrs(const unsigned n, float* A, - const unsigned p, float* B, const unsigned ldB) + const unsigned p, float* B, const unsigned ldB) { //stptrs_(JOB_STR+5, JOB_STR, JOB_STR, &n, &p, A, B, &ldB, &inf); for (unsigned i=0; i<p; ++i) @@ -1314,14 +1314,14 @@ namespace lapack // upper triangluar transpose solve inline void utrhs(const unsigned n, double* A, - const unsigned p, double* B, const unsigned ldB) + const unsigned p, double* B, const unsigned ldB) { //dtptrs_(JOB_STR+5, JOB_STR+1, JOB_STR, &n, &p, A, B, &ldB, &inf); for (unsigned i=0; i<p; ++i) dtpsv_(JOB_STR+5, JOB_STR+1, JOB_STR, &n, A, B+i*ldB, &N_ONE); } inline void utrhs(const unsigned n, float* A, - const unsigned p, float* B, const unsigned ldB) + const unsigned p, float* B, const unsigned ldB) { //stptrs_(JOB_STR+5, JOB_STR+1, JOB_STR, &n, &p, A, B, &ldB, &inf); for (unsigned i=0; i<p; ++i) diff --git a/MathLib/LinAlg/Sparse/CRSMatrix.h b/MathLib/LinAlg/Sparse/CRSMatrix.h index ef5b51412c7..e87e9acbbeb 100644 --- a/MathLib/LinAlg/Sparse/CRSMatrix.h +++ b/MathLib/LinAlg/Sparse/CRSMatrix.h @@ -33,77 +33,77 @@ template<typename FP_TYPE, typename IDX_TYPE> class CRSMatrix: public SparseMatrixBase<FP_TYPE, IDX_TYPE> { public: - typedef FP_TYPE FP_T; + typedef FP_TYPE FP_T; public: - explicit CRSMatrix(std::string const &fname) : - SparseMatrixBase<FP_TYPE, IDX_TYPE>(), - _row_ptr(NULL), _col_idx(NULL), _data(NULL) - { - std::ifstream in(fname.c_str(), std::ios::in | std::ios::binary); - if (in) { - CS_read(in, SparseMatrixBase<FP_TYPE, IDX_TYPE>::_n_rows, _row_ptr, _col_idx, _data); - SparseMatrixBase<FP_TYPE, IDX_TYPE>::_n_cols = SparseMatrixBase<FP_TYPE, IDX_TYPE>::_n_rows; - in.close(); - } else { - std::cout << "cannot open " << fname << std::endl; - } - } - - explicit CRSMatrix(IDX_TYPE n, IDX_TYPE *iA, IDX_TYPE *jA, FP_TYPE* A) : - SparseMatrixBase<FP_TYPE, IDX_TYPE>(n,n), - _row_ptr(iA), _col_idx(jA), _data(A) - {} - - explicit CRSMatrix(MatrixSparsityPattern const& mat_sparsity_pattern) : - SparseMatrixBase<FP_TYPE, IDX_TYPE>(mat_sparsity_pattern.getNRows(), - mat_sparsity_pattern.getNRows()), - _row_ptr(nullptr), _col_idx(nullptr), _data(nullptr) - { - // reserve memory for _row_ptr - _row_ptr = new IDX_TYPE [this->_n_rows + 1]; - // initialize _row_ptr - _row_ptr[0] = 0; - for (std::size_t row(0); row < this->_n_rows; row++) { - _row_ptr[row + 1] = _row_ptr[row] - + std::distance(mat_sparsity_pattern.getRowBeginIterator(row), - mat_sparsity_pattern.getRowEndIterator(row)); - } - - std::size_t const nnz = _row_ptr[this->_n_rows]; - // reserve memory for _col_idx - _col_idx = new IDX_TYPE [nnz]; - // fill _col_idx - for (std::size_t row(0); row < this->_n_rows; row++) { - std::copy(mat_sparsity_pattern.getRowBeginIterator(row), - mat_sparsity_pattern.getRowEndIterator(row), - &_col_idx[_row_ptr[row]]); - } - - // reserve memory for _data - _data = new FP_TYPE [nnz]; - setZero(); - } - - /// Reset data entries to zero. - virtual void setZero() - { - std::fill_n(_data, _row_ptr[this->_n_rows], 0); - } - - virtual ~CRSMatrix() - { - delete [] _row_ptr; - delete [] _col_idx; - delete [] _data; - } - - virtual void amux(FP_TYPE const d, - FP_TYPE const* const __restrict__ x, - FP_TYPE* __restrict__ y) const - { - amuxCRS<FP_TYPE, IDX_TYPE>(d, this->getNRows(), _row_ptr, _col_idx, _data, x, y); - } + explicit CRSMatrix(std::string const &fname) : + SparseMatrixBase<FP_TYPE, IDX_TYPE>(), + _row_ptr(NULL), _col_idx(NULL), _data(NULL) + { + std::ifstream in(fname.c_str(), std::ios::in | std::ios::binary); + if (in) { + CS_read(in, SparseMatrixBase<FP_TYPE, IDX_TYPE>::_n_rows, _row_ptr, _col_idx, _data); + SparseMatrixBase<FP_TYPE, IDX_TYPE>::_n_cols = SparseMatrixBase<FP_TYPE, IDX_TYPE>::_n_rows; + in.close(); + } else { + std::cout << "cannot open " << fname << std::endl; + } + } + + explicit CRSMatrix(IDX_TYPE n, IDX_TYPE *iA, IDX_TYPE *jA, FP_TYPE* A) : + SparseMatrixBase<FP_TYPE, IDX_TYPE>(n,n), + _row_ptr(iA), _col_idx(jA), _data(A) + {} + + explicit CRSMatrix(MatrixSparsityPattern const& mat_sparsity_pattern) : + SparseMatrixBase<FP_TYPE, IDX_TYPE>(mat_sparsity_pattern.getNRows(), + mat_sparsity_pattern.getNRows()), + _row_ptr(nullptr), _col_idx(nullptr), _data(nullptr) + { + // reserve memory for _row_ptr + _row_ptr = new IDX_TYPE [this->_n_rows + 1]; + // initialize _row_ptr + _row_ptr[0] = 0; + for (std::size_t row(0); row < this->_n_rows; row++) { + _row_ptr[row + 1] = _row_ptr[row] + + std::distance(mat_sparsity_pattern.getRowBeginIterator(row), + mat_sparsity_pattern.getRowEndIterator(row)); + } + + std::size_t const nnz = _row_ptr[this->_n_rows]; + // reserve memory for _col_idx + _col_idx = new IDX_TYPE [nnz]; + // fill _col_idx + for (std::size_t row(0); row < this->_n_rows; row++) { + std::copy(mat_sparsity_pattern.getRowBeginIterator(row), + mat_sparsity_pattern.getRowEndIterator(row), + &_col_idx[_row_ptr[row]]); + } + + // reserve memory for _data + _data = new FP_TYPE [nnz]; + setZero(); + } + + /// Reset data entries to zero. + virtual void setZero() + { + std::fill_n(_data, _row_ptr[this->_n_rows], 0); + } + + virtual ~CRSMatrix() + { + delete [] _row_ptr; + delete [] _col_idx; + delete [] _data; + } + + virtual void amux(FP_TYPE const d, + FP_TYPE const* const __restrict__ x, + FP_TYPE* __restrict__ y) const + { + amuxCRS<FP_TYPE, IDX_TYPE>(d, this->getNRows(), _row_ptr, _col_idx, _data, x, y); + } virtual void precondApply(FP_TYPE* /*x*/) const {} @@ -116,56 +116,56 @@ public: /** * This method inserts/overwrites a non-zero matrix entry. - * Precondition: the entry have to be in the sparsity pattern! + * Precondition: the entry have to be in the sparsity pattern! * @param row the row number * @param col the column number * @param val the value that should be set at pos row,col * @return true, if the entry is contained in the sparsity pattern, else false */ - bool setValue(IDX_TYPE row, IDX_TYPE col, FP_TYPE val) - { - assert(0 <= row && row < this->_n_rows); - - // linear search - for matrices with many entries per row binary search is much faster - const IDX_TYPE idx_end (_row_ptr[row+1]); - IDX_TYPE j(_row_ptr[row]), k; - - while (j<idx_end && (k=_col_idx[j]) <= col) { - if (k == col) { - _data[j] = val; - return true; - } - j++; - } - return false; - } + bool setValue(IDX_TYPE row, IDX_TYPE col, FP_TYPE val) + { + assert(0 <= row && row < this->_n_rows); + + // linear search - for matrices with many entries per row binary search is much faster + const IDX_TYPE idx_end (_row_ptr[row+1]); + IDX_TYPE j(_row_ptr[row]), k; + + while (j<idx_end && (k=_col_idx[j]) <= col) { + if (k == col) { + _data[j] = val; + return true; + } + j++; + } + return false; + } /** * This method adds value val to an existing matrix entry at position row,col. - * Precondition: the entry have to be in the sparsity pattern! + * Precondition: the entry have to be in the sparsity pattern! * @param row the row number * @param col the column number * @param val the value that should be set at pos row,col * @return true, if the entry is contained in the sparsity pattern, else false */ - bool addValue(IDX_TYPE row, IDX_TYPE col, FP_TYPE val) - { - assert(0 <= row && row < this->_n_rows); - - // linear search - for matrices with many entries per row binary search is much faster - const IDX_TYPE idx_end (_row_ptr[row+1]); - IDX_TYPE j(_row_ptr[row]), k; - - while (j<idx_end && (k=_col_idx[j]) <= col) { - if (k == col) { - #pragma omp atomic - _data[j] += val; - return true; - } - j++; - } - return false; - } + bool addValue(IDX_TYPE row, IDX_TYPE col, FP_TYPE val) + { + assert(0 <= row && row < this->_n_rows); + + // linear search - for matrices with many entries per row binary search is much faster + const IDX_TYPE idx_end (_row_ptr[row+1]); + IDX_TYPE j(_row_ptr[row]), k; + + while (j<idx_end && (k=_col_idx[j]) <= col) { + if (k == col) { + #pragma omp atomic + _data[j] += val; + return true; + } + j++; + } + return false; + } /** * This is an access operator to a non-zero matrix entry. If the value of @@ -174,22 +174,22 @@ public: * @param col the column number * @return The corresponding matrix entry or 0.0. */ - FP_TYPE getValue(IDX_TYPE row, IDX_TYPE col) - { - assert(0 <= row && row < this->_n_rows); - - // linear search - for matrices with many entries per row binary search is much faster - const IDX_TYPE idx_end (_row_ptr[row+1]); - IDX_TYPE j(_row_ptr[row]), k; - - while (j<idx_end && (k=_col_idx[j]) <= col) { - if (k == col) { - return _data[j]; - } - j++; - } - return 0.0; - } + FP_TYPE getValue(IDX_TYPE row, IDX_TYPE col) + { + assert(0 <= row && row < this->_n_rows); + + // linear search - for matrices with many entries per row binary search is much faster + const IDX_TYPE idx_end (_row_ptr[row+1]); + IDX_TYPE j(_row_ptr[row]), k; + + while (j<idx_end && (k=_col_idx[j]) <= col) { + if (k == col) { + return _data[j]; + } + j++; + } + return 0.0; + } /** * This is the constant access operator to a non-zero matrix entry. @@ -200,254 +200,254 @@ public: */ FP_TYPE operator() (IDX_TYPE row, IDX_TYPE col) const { - assert(0 <= row && row < this->_n_rows); - - // linear search - for matrices with many entries per row binary search is much faster - const IDX_TYPE idx_end (_row_ptr[row+1]); - IDX_TYPE j(_row_ptr[row]), k; - - while (j<idx_end && (k=_col_idx[j]) <= col) { - if (k == col) { - return _data[j]; - } - j++; - } - return 0.0; + assert(0 <= row && row < this->_n_rows); + + // linear search - for matrices with many entries per row binary search is much faster + const IDX_TYPE idx_end (_row_ptr[row+1]); + IDX_TYPE j(_row_ptr[row]), k; + + while (j<idx_end && (k=_col_idx[j]) <= col) { + if (k == col) { + return _data[j]; + } + j++; + } + return 0.0; } - /** - * get const access to the row pointer array of CRS matrix - * @return the index array _row_ptr - */ - IDX_TYPE const* getRowPtrArray() const { return _row_ptr; } - - /** - * get const access to the column index array of CRS matrix - * @return the index array _col_idx - */ - IDX_TYPE const* getColIdxArray ()const { return _col_idx; } - - /** - * get the matrix entries within an array of CRS matrix - */ - FP_TYPE const* getEntryArray() const { return _data; } - - /** - * erase rows and columns from sparse matrix - * @param n_rows_cols number of rows / columns to remove - * @param rows_cols sorted list of rows/columns that should be removed - */ - void eraseEntries(IDX_TYPE n_rows_cols, IDX_TYPE const* const rows_cols) - { - //*** remove the rows - removeRows(n_rows_cols, rows_cols); - //*** transpose - transpose(); - //*** remove columns in original means removing rows in the transposed - removeRows(n_rows_cols, rows_cols); - //*** transpose again - transpose(); - } - - /** - * get the j-th column of the sparse matrix - * @param j the column number that should be returned - * @param column_entries the column entries (have to be allocated - */ - void getColumn(IDX_TYPE j, FP_TYPE* column_entries) const - { - for (IDX_TYPE k(0); k < this->_n_rows; k++) { - const IDX_TYPE end_row(_row_ptr[k+1]); - IDX_TYPE i(_row_ptr[k+1]); - while (i<end_row && _col_idx[i] != j) { - i++; - } - if (i==end_row) { - column_entries[k] = 0.0; - } else { - column_entries[k] = _data[i]; - } - } - } - - CRSMatrix<FP_TYPE, IDX_TYPE>* getTranspose() const - { - CRSMatrix<FP_TYPE, IDX_TYPE>* transposed_mat(new CRSMatrix<FP_TYPE, IDX_TYPE>(*this)); - transposed_mat->transpose(); - return transposed_mat; - } + /** + * get const access to the row pointer array of CRS matrix + * @return the index array _row_ptr + */ + IDX_TYPE const* getRowPtrArray() const { return _row_ptr; } + + /** + * get const access to the column index array of CRS matrix + * @return the index array _col_idx + */ + IDX_TYPE const* getColIdxArray ()const { return _col_idx; } + + /** + * get the matrix entries within an array of CRS matrix + */ + FP_TYPE const* getEntryArray() const { return _data; } + + /** + * erase rows and columns from sparse matrix + * @param n_rows_cols number of rows / columns to remove + * @param rows_cols sorted list of rows/columns that should be removed + */ + void eraseEntries(IDX_TYPE n_rows_cols, IDX_TYPE const* const rows_cols) + { + //*** remove the rows + removeRows(n_rows_cols, rows_cols); + //*** transpose + transpose(); + //*** remove columns in original means removing rows in the transposed + removeRows(n_rows_cols, rows_cols); + //*** transpose again + transpose(); + } + + /** + * get the j-th column of the sparse matrix + * @param j the column number that should be returned + * @param column_entries the column entries (have to be allocated + */ + void getColumn(IDX_TYPE j, FP_TYPE* column_entries) const + { + for (IDX_TYPE k(0); k < this->_n_rows; k++) { + const IDX_TYPE end_row(_row_ptr[k+1]); + IDX_TYPE i(_row_ptr[k+1]); + while (i<end_row && _col_idx[i] != j) { + i++; + } + if (i==end_row) { + column_entries[k] = 0.0; + } else { + column_entries[k] = _data[i]; + } + } + } + + CRSMatrix<FP_TYPE, IDX_TYPE>* getTranspose() const + { + CRSMatrix<FP_TYPE, IDX_TYPE>* transposed_mat(new CRSMatrix<FP_TYPE, IDX_TYPE>(*this)); + transposed_mat->transpose(); + return transposed_mat; + } protected: - CRSMatrix(CRSMatrix const& rhs) : - SparseMatrixBase<FP_TYPE, IDX_TYPE> (rhs.getNRows(), rhs.getNCols()), - _row_ptr(new IDX_TYPE[rhs.getNRows() + 1]), _col_idx(new IDX_TYPE[rhs.getNNZ()]), - _data(new FP_TYPE[rhs.getNNZ()]) - { - // copy the data - IDX_TYPE const* row_ptr(rhs.getRowPtrArray()); - for (IDX_TYPE k(0); k <= this->_n_rows; k++) { - _row_ptr[k] = row_ptr[k]; - } - - IDX_TYPE nnz(rhs.getNNZ()); - IDX_TYPE const*const col_idx(rhs.getColIdxArray()); - for (IDX_TYPE k(0); k<nnz; k++) { - _col_idx[k] = col_idx[k]; - } - - FP_TYPE const*const data(rhs.getEntryArray()); - for (IDX_TYPE k(0); k<nnz; k++) { - _data[k] = data[k]; - } - } - - void removeRows (IDX_TYPE n_rows_cols, IDX_TYPE const*const rows) - { - //*** determine the number of new rows and the number of entries without the rows - const IDX_TYPE n_new_rows(this->_n_rows - n_rows_cols); - IDX_TYPE *row_ptr_new(new IDX_TYPE[n_new_rows+1]); - row_ptr_new[0] = 0; - IDX_TYPE row_cnt (1), erase_row_cnt(0); - for (unsigned k(0); k < this->_n_rows; k++) { - if (erase_row_cnt < n_rows_cols) { - if (k != rows[erase_row_cnt]) { - row_ptr_new[row_cnt] = _row_ptr[k+1] - _row_ptr[k]; - row_cnt++; - } else { - erase_row_cnt++; - } - } else { - row_ptr_new[row_cnt] = _row_ptr[k+1] - _row_ptr[k]; - row_cnt++; - } - } - - //*** sum up the entries - for (IDX_TYPE k(0); k<n_new_rows; k++) { - row_ptr_new[k+1] = row_ptr_new[k+1] + row_ptr_new[k]; - } - - //*** create new memory for col_idx and data - IDX_TYPE nnz_new(row_ptr_new[n_new_rows]); - IDX_TYPE *col_idx_new (new IDX_TYPE[nnz_new]); - FP_TYPE *data_new (new FP_TYPE[nnz_new]); - - //*** copy the entries - // initialization - IDX_TYPE *row_ptr_new_tmp(new IDX_TYPE[n_new_rows+1]); - for (unsigned k(0); k<=n_new_rows; k++) { - row_ptr_new_tmp[k] = row_ptr_new[k]; - } - erase_row_cnt = 0; - row_cnt = 0; - // copy column index and data entries - for (IDX_TYPE k(0); k < this->_n_rows; k++) { - if (erase_row_cnt < n_rows_cols) { - if (k != rows[erase_row_cnt]) { - const IDX_TYPE end (_row_ptr[k+1]); - // walk through row - for (IDX_TYPE j(_row_ptr[k]); j<end; j++) { - col_idx_new[row_ptr_new_tmp[row_cnt]] = _col_idx[j]; - data_new[row_ptr_new_tmp[row_cnt]] = _data[j]; - row_ptr_new_tmp[row_cnt]++; - } - row_cnt++; - } else { - erase_row_cnt++; - } - } else { - const IDX_TYPE end (_row_ptr[k+1]); - // walk through row - for (IDX_TYPE j(_row_ptr[k]); j<end; j++) { - col_idx_new[row_ptr_new_tmp[row_cnt]] = _col_idx[j]; - data_new[row_ptr_new_tmp[row_cnt]] = _data[j]; - row_ptr_new_tmp[row_cnt]++; - } - row_cnt++; - } - } - - this->_n_rows -= n_rows_cols; - std::swap (row_ptr_new, _row_ptr); - std::swap (col_idx_new, _col_idx); - std::swap (data_new, _data); - - delete [] row_ptr_new_tmp; - delete [] row_ptr_new; - delete [] col_idx_new; - delete [] data_new; - } - - void transpose () - { - // create a helper array row_ptr_nnz - IDX_TYPE *row_ptr_nnz(new IDX_TYPE[this->_n_cols+1]); - for (IDX_TYPE k(0); k <= this->_n_cols; k++) { - row_ptr_nnz[k] = 0; - } - - // count entries per row in the transposed matrix - IDX_TYPE nnz(_row_ptr[this->_n_rows]); - for (IDX_TYPE k(0); k < nnz; k++) { - row_ptr_nnz[_col_idx[k]]++; - } - - // create row_ptr_trans - IDX_TYPE *row_ptr_trans(new IDX_TYPE[this->_n_cols + 1]); - row_ptr_trans[0] = 0; - for (IDX_TYPE k(0); k < this->_n_cols; k++) { - row_ptr_trans[k+1] = row_ptr_trans[k] + row_ptr_nnz[k]; - } - - // make a copy of row_ptr_trans - for (IDX_TYPE k(0); k <= this->_n_cols; k++) { - row_ptr_nnz[k] = row_ptr_trans[k]; - } - - // create arrays col_idx_trans and data_trans - assert(nnz == row_ptr_trans[this->_n_cols]); - IDX_TYPE *col_idx_trans(new IDX_TYPE[nnz]); - FP_TYPE *data_trans(new FP_TYPE[nnz]); - - // fill arrays col_idx_trans and data_trans - for (IDX_TYPE i(0); i < this->_n_rows; i++) { - const IDX_TYPE row_end(_row_ptr[i + 1]); - for (IDX_TYPE j(_row_ptr[i]); j < row_end; j++) { - const IDX_TYPE k(_col_idx[j]); - col_idx_trans[row_ptr_nnz[k]] = i; - data_trans[row_ptr_nnz[k]] = _data[j]; - row_ptr_nnz[k]++; - } - } - - std::swap(this->_n_rows, this->_n_cols); - std::swap(row_ptr_trans, _row_ptr); - std::swap(col_idx_trans, _col_idx); - std::swap(data_trans, _data); - - delete[] row_ptr_nnz; - delete[] row_ptr_trans; - delete[] col_idx_trans; - delete[] data_trans; - } + CRSMatrix(CRSMatrix const& rhs) : + SparseMatrixBase<FP_TYPE, IDX_TYPE> (rhs.getNRows(), rhs.getNCols()), + _row_ptr(new IDX_TYPE[rhs.getNRows() + 1]), _col_idx(new IDX_TYPE[rhs.getNNZ()]), + _data(new FP_TYPE[rhs.getNNZ()]) + { + // copy the data + IDX_TYPE const* row_ptr(rhs.getRowPtrArray()); + for (IDX_TYPE k(0); k <= this->_n_rows; k++) { + _row_ptr[k] = row_ptr[k]; + } + + IDX_TYPE nnz(rhs.getNNZ()); + IDX_TYPE const*const col_idx(rhs.getColIdxArray()); + for (IDX_TYPE k(0); k<nnz; k++) { + _col_idx[k] = col_idx[k]; + } + + FP_TYPE const*const data(rhs.getEntryArray()); + for (IDX_TYPE k(0); k<nnz; k++) { + _data[k] = data[k]; + } + } + + void removeRows (IDX_TYPE n_rows_cols, IDX_TYPE const*const rows) + { + //*** determine the number of new rows and the number of entries without the rows + const IDX_TYPE n_new_rows(this->_n_rows - n_rows_cols); + IDX_TYPE *row_ptr_new(new IDX_TYPE[n_new_rows+1]); + row_ptr_new[0] = 0; + IDX_TYPE row_cnt (1), erase_row_cnt(0); + for (unsigned k(0); k < this->_n_rows; k++) { + if (erase_row_cnt < n_rows_cols) { + if (k != rows[erase_row_cnt]) { + row_ptr_new[row_cnt] = _row_ptr[k+1] - _row_ptr[k]; + row_cnt++; + } else { + erase_row_cnt++; + } + } else { + row_ptr_new[row_cnt] = _row_ptr[k+1] - _row_ptr[k]; + row_cnt++; + } + } + + //*** sum up the entries + for (IDX_TYPE k(0); k<n_new_rows; k++) { + row_ptr_new[k+1] = row_ptr_new[k+1] + row_ptr_new[k]; + } + + //*** create new memory for col_idx and data + IDX_TYPE nnz_new(row_ptr_new[n_new_rows]); + IDX_TYPE *col_idx_new (new IDX_TYPE[nnz_new]); + FP_TYPE *data_new (new FP_TYPE[nnz_new]); + + //*** copy the entries + // initialization + IDX_TYPE *row_ptr_new_tmp(new IDX_TYPE[n_new_rows+1]); + for (unsigned k(0); k<=n_new_rows; k++) { + row_ptr_new_tmp[k] = row_ptr_new[k]; + } + erase_row_cnt = 0; + row_cnt = 0; + // copy column index and data entries + for (IDX_TYPE k(0); k < this->_n_rows; k++) { + if (erase_row_cnt < n_rows_cols) { + if (k != rows[erase_row_cnt]) { + const IDX_TYPE end (_row_ptr[k+1]); + // walk through row + for (IDX_TYPE j(_row_ptr[k]); j<end; j++) { + col_idx_new[row_ptr_new_tmp[row_cnt]] = _col_idx[j]; + data_new[row_ptr_new_tmp[row_cnt]] = _data[j]; + row_ptr_new_tmp[row_cnt]++; + } + row_cnt++; + } else { + erase_row_cnt++; + } + } else { + const IDX_TYPE end (_row_ptr[k+1]); + // walk through row + for (IDX_TYPE j(_row_ptr[k]); j<end; j++) { + col_idx_new[row_ptr_new_tmp[row_cnt]] = _col_idx[j]; + data_new[row_ptr_new_tmp[row_cnt]] = _data[j]; + row_ptr_new_tmp[row_cnt]++; + } + row_cnt++; + } + } + + this->_n_rows -= n_rows_cols; + std::swap (row_ptr_new, _row_ptr); + std::swap (col_idx_new, _col_idx); + std::swap (data_new, _data); + + delete [] row_ptr_new_tmp; + delete [] row_ptr_new; + delete [] col_idx_new; + delete [] data_new; + } + + void transpose () + { + // create a helper array row_ptr_nnz + IDX_TYPE *row_ptr_nnz(new IDX_TYPE[this->_n_cols+1]); + for (IDX_TYPE k(0); k <= this->_n_cols; k++) { + row_ptr_nnz[k] = 0; + } + + // count entries per row in the transposed matrix + IDX_TYPE nnz(_row_ptr[this->_n_rows]); + for (IDX_TYPE k(0); k < nnz; k++) { + row_ptr_nnz[_col_idx[k]]++; + } + + // create row_ptr_trans + IDX_TYPE *row_ptr_trans(new IDX_TYPE[this->_n_cols + 1]); + row_ptr_trans[0] = 0; + for (IDX_TYPE k(0); k < this->_n_cols; k++) { + row_ptr_trans[k+1] = row_ptr_trans[k] + row_ptr_nnz[k]; + } + + // make a copy of row_ptr_trans + for (IDX_TYPE k(0); k <= this->_n_cols; k++) { + row_ptr_nnz[k] = row_ptr_trans[k]; + } + + // create arrays col_idx_trans and data_trans + assert(nnz == row_ptr_trans[this->_n_cols]); + IDX_TYPE *col_idx_trans(new IDX_TYPE[nnz]); + FP_TYPE *data_trans(new FP_TYPE[nnz]); + + // fill arrays col_idx_trans and data_trans + for (IDX_TYPE i(0); i < this->_n_rows; i++) { + const IDX_TYPE row_end(_row_ptr[i + 1]); + for (IDX_TYPE j(_row_ptr[i]); j < row_end; j++) { + const IDX_TYPE k(_col_idx[j]); + col_idx_trans[row_ptr_nnz[k]] = i; + data_trans[row_ptr_nnz[k]] = _data[j]; + row_ptr_nnz[k]++; + } + } + + std::swap(this->_n_rows, this->_n_cols); + std::swap(row_ptr_trans, _row_ptr); + std::swap(col_idx_trans, _col_idx); + std::swap(data_trans, _data); + + delete[] row_ptr_nnz; + delete[] row_ptr_trans; + delete[] col_idx_trans; + delete[] data_trans; + } #ifndef NDEBUG - void printMat() const - { - for (IDX_TYPE k(0); k < this->_n_rows; k++) { - std::cout << k << ": " << std::flush; - const IDX_TYPE row_end(_row_ptr[k+1]); - for (IDX_TYPE j(_row_ptr[k]); j<row_end; j++) { - std::cout << _col_idx[j] << " " << std::flush; - } - std::cout << std::endl; - } - } + void printMat() const + { + for (IDX_TYPE k(0); k < this->_n_rows; k++) { + std::cout << k << ": " << std::flush; + const IDX_TYPE row_end(_row_ptr[k+1]); + for (IDX_TYPE j(_row_ptr[k]); j<row_end; j++) { + std::cout << _col_idx[j] << " " << std::flush; + } + std::cout << std::endl; + } + } #endif - IDX_TYPE *_row_ptr; - IDX_TYPE *_col_idx; - FP_TYPE* _data; + IDX_TYPE *_row_ptr; + IDX_TYPE *_col_idx; + FP_TYPE* _data; }; } // end namespace MathLib diff --git a/MathLib/LinAlg/Sparse/CRSMatrixDiagPrecond.h b/MathLib/LinAlg/Sparse/CRSMatrixDiagPrecond.h index 8c87d0944d0..cc00ec9cf2d 100644 --- a/MathLib/LinAlg/Sparse/CRSMatrixDiagPrecond.h +++ b/MathLib/LinAlg/Sparse/CRSMatrixDiagPrecond.h @@ -22,63 +22,63 @@ namespace MathLib { class CRSMatrixDiagPrecond : public CRSMatrix<double, unsigned> { public: - /** - * Constructor takes a file name. The file is read in binary format - * by the constructor of the base class (template) CRSMatrix. - * Further details you can see in function CS_read(). - * - * The user have to calculate the preconditioner explicit via calcPrecond() method! - * - * @param fname the name of the file that contains the matrix in - * binary compressed row storage format - */ - CRSMatrixDiagPrecond(std::string const &fname) : - CRSMatrix<double, unsigned> (fname), _inv_diag(NULL) - {} + /** + * Constructor takes a file name. The file is read in binary format + * by the constructor of the base class (template) CRSMatrix. + * Further details you can see in function CS_read(). + * + * The user have to calculate the preconditioner explicit via calcPrecond() method! + * + * @param fname the name of the file that contains the matrix in + * binary compressed row storage format + */ + CRSMatrixDiagPrecond(std::string const &fname) : + CRSMatrix<double, unsigned> (fname), _inv_diag(NULL) + {} - /** - * Constructs a matrix object from given data. - * - * The user have to calculate the preconditioner explicit via calcPrecond() method! - * @param n number of rows / columns of the matrix - * @param iA row pointer of matrix in compressed row storage format - * @param jA column index of matrix in compressed row storage format - * @param A data entries of matrix in compressed row storage format - */ - CRSMatrixDiagPrecond(unsigned n, unsigned *iA, unsigned *jA, double* A) : - CRSMatrix<double, unsigned> (n, iA, jA, A), _inv_diag(NULL) - {} + /** + * Constructs a matrix object from given data. + * + * The user have to calculate the preconditioner explicit via calcPrecond() method! + * @param n number of rows / columns of the matrix + * @param iA row pointer of matrix in compressed row storage format + * @param jA column index of matrix in compressed row storage format + * @param A data entries of matrix in compressed row storage format + */ + CRSMatrixDiagPrecond(unsigned n, unsigned *iA, unsigned *jA, double* A) : + CRSMatrix<double, unsigned> (n, iA, jA, A), _inv_diag(NULL) + {} - void calcPrecond() - { - delete [] _inv_diag; - _inv_diag = new double[_n_rows]; + void calcPrecond() + { + delete [] _inv_diag; + _inv_diag = new double[_n_rows]; - if (!generateDiagPrecond(_n_rows, _row_ptr, _col_idx, _data, _inv_diag)) { - std::cout << "Could not create diagonal preconditioner" << std::endl; - } -// if (!generateDiagPrecondRowSum(_n_rows, _row_ptr, _data, _inv_diag)) { -// std::cout << "Could not create diagonal preconditioner" << std::endl; -// } -// if (!generateDiagPrecondRowMax(_n_rows, _row_ptr, _data, _inv_diag)) { -// std::cout << "Could not create diagonal preconditioner" << std::endl; -// } + if (!generateDiagPrecond(_n_rows, _row_ptr, _col_idx, _data, _inv_diag)) { + std::cout << "Could not create diagonal preconditioner" << std::endl; + } +// if (!generateDiagPrecondRowSum(_n_rows, _row_ptr, _data, _inv_diag)) { +// std::cout << "Could not create diagonal preconditioner" << std::endl; +// } +// if (!generateDiagPrecondRowMax(_n_rows, _row_ptr, _data, _inv_diag)) { +// std::cout << "Could not create diagonal preconditioner" << std::endl; +// } - } + } - void precondApply(double* x) const - { - for (unsigned k=0; k<_n_rows; ++k) { - x[k] = _inv_diag[k]*x[k]; - } - } + void precondApply(double* x) const + { + for (unsigned k=0; k<_n_rows; ++k) { + x[k] = _inv_diag[k]*x[k]; + } + } - ~CRSMatrixDiagPrecond() - { - delete [] _inv_diag; - } + ~CRSMatrixDiagPrecond() + { + delete [] _inv_diag; + } private: - double *_inv_diag; + double *_inv_diag; }; } diff --git a/MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h b/MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h index 437c0bacdb6..66f4a04aaf1 100644 --- a/MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h +++ b/MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h @@ -25,25 +25,25 @@ namespace MathLib { template<typename FP_TYPE, typename IDX_TYPE> class CRSMatrixOpenMP : public CRSMatrix<FP_TYPE, IDX_TYPE> { public: - CRSMatrixOpenMP(std::string const &fname) : - CRSMatrix<FP_TYPE, IDX_TYPE>(fname) - {} - - CRSMatrixOpenMP(unsigned n, IDX_TYPE *iA, IDX_TYPE *jA, FP_TYPE* A) : - CRSMatrix<FP_TYPE, IDX_TYPE>(n, iA, jA, A) - {} - - CRSMatrixOpenMP(unsigned n1) : - CRSMatrix<FP_TYPE, IDX_TYPE>(n1) - {} - - virtual ~CRSMatrixOpenMP() - {} - virtual void amux(FP_TYPE const d, FP_TYPE const* const __restrict__ x, - FP_TYPE* __restrict__ y) const - { - amuxCRSParallelOpenMP(d, this->_n_rows, this->_row_ptr, this->_col_idx, this->_data, x, y); - } + CRSMatrixOpenMP(std::string const &fname) : + CRSMatrix<FP_TYPE, IDX_TYPE>(fname) + {} + + CRSMatrixOpenMP(unsigned n, IDX_TYPE *iA, IDX_TYPE *jA, FP_TYPE* A) : + CRSMatrix<FP_TYPE, IDX_TYPE>(n, iA, jA, A) + {} + + CRSMatrixOpenMP(unsigned n1) : + CRSMatrix<FP_TYPE, IDX_TYPE>(n1) + {} + + virtual ~CRSMatrixOpenMP() + {} + virtual void amux(FP_TYPE const d, FP_TYPE const* const __restrict__ x, + FP_TYPE* __restrict__ y) const + { + amuxCRSParallelOpenMP(d, this->_n_rows, this->_row_ptr, this->_col_idx, this->_data, x, y); + } }; } // end namespace MathLib diff --git a/MathLib/LinAlg/Sparse/CRSMatrixPThreads.h b/MathLib/LinAlg/Sparse/CRSMatrixPThreads.h index c8f3ea97c0b..377914bbcf1 100644 --- a/MathLib/LinAlg/Sparse/CRSMatrixPThreads.h +++ b/MathLib/LinAlg/Sparse/CRSMatrixPThreads.h @@ -27,79 +27,79 @@ namespace MathLib { template<class T> class CRSMatrixPThreads : public CRSMatrix<T,unsigned> { public: - CRSMatrixPThreads(std::string const &fname, unsigned num_of_threads) : - CRSMatrix<T,unsigned>(fname), _n_threads (num_of_threads), - _workload_intervals(new unsigned[num_of_threads+1]) - { - calcWorkload(); - } - - CRSMatrixPThreads(unsigned n, unsigned *iA, unsigned *jA, T* A, unsigned num_of_threads) : - CRSMatrix<T,unsigned>(n, iA, jA, A), _n_threads (num_of_threads), - _workload_intervals(new unsigned[num_of_threads+1]) - { - calcWorkload(); - } - - CRSMatrixPThreads(unsigned n1) : - CRSMatrix<T,unsigned>(n1), _n_threads (1), - _workload_intervals(new unsigned[_n_threads+1]) - { - calcWorkload(); - } - - virtual ~CRSMatrixPThreads() - { - delete [] _workload_intervals; - } - - virtual void amux(T d, T const * const x, T *y) const - { - amuxCRSParallelPThreads(d, SparseMatrixBase<T, unsigned>::_n_rows, - CRSMatrix<T, unsigned>::_row_ptr, CRSMatrix<T, unsigned>::_col_idx, - CRSMatrix<T, unsigned>::_data, x, y, _n_threads, _workload_intervals); - } + CRSMatrixPThreads(std::string const &fname, unsigned num_of_threads) : + CRSMatrix<T,unsigned>(fname), _n_threads (num_of_threads), + _workload_intervals(new unsigned[num_of_threads+1]) + { + calcWorkload(); + } + + CRSMatrixPThreads(unsigned n, unsigned *iA, unsigned *jA, T* A, unsigned num_of_threads) : + CRSMatrix<T,unsigned>(n, iA, jA, A), _n_threads (num_of_threads), + _workload_intervals(new unsigned[num_of_threads+1]) + { + calcWorkload(); + } + + CRSMatrixPThreads(unsigned n1) : + CRSMatrix<T,unsigned>(n1), _n_threads (1), + _workload_intervals(new unsigned[_n_threads+1]) + { + calcWorkload(); + } + + virtual ~CRSMatrixPThreads() + { + delete [] _workload_intervals; + } + + virtual void amux(T d, T const * const x, T *y) const + { + amuxCRSParallelPThreads(d, SparseMatrixBase<T, unsigned>::_n_rows, + CRSMatrix<T, unsigned>::_row_ptr, CRSMatrix<T, unsigned>::_col_idx, + CRSMatrix<T, unsigned>::_data, x, y, _n_threads, _workload_intervals); + } protected: - void calcWorkload() - { - _workload_intervals[0] = 0; - _workload_intervals[_n_threads] = SparseMatrixBase<T, unsigned>::_n_rows; - - const unsigned work_per_core (this->getNNZ()/_n_threads); - for (unsigned k(1); k<_n_threads; k++) { - unsigned upper_bound_kth_core(k * work_per_core); - // search in _row_ptr array for the appropriate index - unsigned beg (_workload_intervals[k-1]); - unsigned end (_workload_intervals[_n_threads]); - bool found (false); - while (beg < end && !found) { - unsigned m ((end+beg)/2); - - if (upper_bound_kth_core == this->_row_ptr[m]) { - _workload_intervals[k] = m; - found = true; - } else { - if (upper_bound_kth_core < this->_row_ptr[m]) { - end = m; - } else { - beg = m+1; - } - } - } - if (!found) - _workload_intervals[k] = beg; - } - - for (unsigned k(0); k<_n_threads; k++) { - std::cout << "proc " << k << ": [" << _workload_intervals[k] << "," << _workload_intervals[k+1] << ") - " - << _workload_intervals[k+1] - _workload_intervals[k] << " rows and " - << this->_row_ptr[_workload_intervals[k+1]] - this->_row_ptr[_workload_intervals[k]] << " entries" << std::endl; - } - } - - const unsigned _n_threads; - unsigned *_workload_intervals; + void calcWorkload() + { + _workload_intervals[0] = 0; + _workload_intervals[_n_threads] = SparseMatrixBase<T, unsigned>::_n_rows; + + const unsigned work_per_core (this->getNNZ()/_n_threads); + for (unsigned k(1); k<_n_threads; k++) { + unsigned upper_bound_kth_core(k * work_per_core); + // search in _row_ptr array for the appropriate index + unsigned beg (_workload_intervals[k-1]); + unsigned end (_workload_intervals[_n_threads]); + bool found (false); + while (beg < end && !found) { + unsigned m ((end+beg)/2); + + if (upper_bound_kth_core == this->_row_ptr[m]) { + _workload_intervals[k] = m; + found = true; + } else { + if (upper_bound_kth_core < this->_row_ptr[m]) { + end = m; + } else { + beg = m+1; + } + } + } + if (!found) + _workload_intervals[k] = beg; + } + + for (unsigned k(0); k<_n_threads; k++) { + std::cout << "proc " << k << ": [" << _workload_intervals[k] << "," << _workload_intervals[k+1] << ") - " + << _workload_intervals[k+1] - _workload_intervals[k] << " rows and " + << this->_row_ptr[_workload_intervals[k+1]] - this->_row_ptr[_workload_intervals[k]] << " entries" << std::endl; + } + } + + const unsigned _n_threads; + unsigned *_workload_intervals; }; } // end namespace MathLib diff --git a/MathLib/LinAlg/Sparse/CRSSymMatrix.h b/MathLib/LinAlg/Sparse/CRSSymMatrix.h index f10bceab1ca..fafb9e6d3ed 100644 --- a/MathLib/LinAlg/Sparse/CRSSymMatrix.h +++ b/MathLib/LinAlg/Sparse/CRSSymMatrix.h @@ -20,54 +20,54 @@ template<class T> class CRSSymMatrix : public CRSMatrix<T> { public: - typedef T FP_T; + typedef T FP_T; public: - CRSSymMatrix(std::string const &fname) - : CRSMatrix<T> (fname) - { - unsigned nnz (0); + CRSSymMatrix(std::string const &fname) + : CRSMatrix<T> (fname) + { + unsigned nnz (0); - // count number of non-zeros in the upper triangular part - for (unsigned i = 0; i < SparseMatrixBase<T>::_n_rows; i++) { - unsigned idx = CRSMatrix<T>::_row_ptr[i+1]; - for (unsigned j = CRSMatrix<T>::_row_ptr[i]; j < idx; j++) - if (CRSMatrix<T>::_col_idx[j] >= i) - ++nnz; - } + // count number of non-zeros in the upper triangular part + for (unsigned i = 0; i < SparseMatrixBase<T>::_n_rows; i++) { + unsigned idx = CRSMatrix<T>::_row_ptr[i+1]; + for (unsigned j = CRSMatrix<T>::_row_ptr[i]; j < idx; j++) + if (CRSMatrix<T>::_col_idx[j] >= i) + ++nnz; + } - double *A_new (new double[nnz]); - unsigned *jA_new (new unsigned[nnz]); - unsigned *iA_new (new unsigned[SparseMatrixBase<T>::_n_rows+1]); + double *A_new (new double[nnz]); + unsigned *jA_new (new unsigned[nnz]); + unsigned *iA_new (new unsigned[SparseMatrixBase<T>::_n_rows+1]); - iA_new[0] = nnz = 0; + iA_new[0] = nnz = 0; - for (unsigned i = 0; i < SparseMatrixBase<T>::_n_rows; i++) { - const unsigned idx (CRSMatrix<T>::_row_ptr[i+1]); - for (unsigned j = CRSMatrix<T>::_row_ptr[i]; j < idx; j++) { - if (CRSMatrix<T>::_col_idx[j] >= i) { - A_new[nnz] = CRSMatrix<T>::_data[j]; - jA_new[nnz++] = CRSMatrix<T>::_col_idx[j]; - } - } - iA_new[i+1] = nnz; - } + for (unsigned i = 0; i < SparseMatrixBase<T>::_n_rows; i++) { + const unsigned idx (CRSMatrix<T>::_row_ptr[i+1]); + for (unsigned j = CRSMatrix<T>::_row_ptr[i]; j < idx; j++) { + if (CRSMatrix<T>::_col_idx[j] >= i) { + A_new[nnz] = CRSMatrix<T>::_data[j]; + jA_new[nnz++] = CRSMatrix<T>::_col_idx[j]; + } + } + iA_new[i+1] = nnz; + } - std::swap(CRSMatrix<T>::_row_ptr, iA_new); - std::swap(CRSMatrix<T>::_col_idx, jA_new); - std::swap(CRSMatrix<T>::_data, A_new); + std::swap(CRSMatrix<T>::_row_ptr, iA_new); + std::swap(CRSMatrix<T>::_col_idx, jA_new); + std::swap(CRSMatrix<T>::_data, A_new); - delete[] iA_new; - delete[] jA_new; - delete[] A_new; - } + delete[] iA_new; + delete[] jA_new; + delete[] A_new; + } - virtual ~CRSSymMatrix() {} + virtual ~CRSSymMatrix() {} - void amux(T d, T const * const x, T *y) const - { - amuxCRSSym (d, SparseMatrixBase<T>::_n_rows, CRSMatrix<T>::_row_ptr, CRSMatrix<T>::_col_idx, CRSMatrix<T>::_data, x, y); - } + void amux(T d, T const * const x, T *y) const + { + amuxCRSSym (d, SparseMatrixBase<T>::_n_rows, CRSMatrix<T>::_row_ptr, CRSMatrix<T>::_col_idx, CRSMatrix<T>::_data, x, y); + } }; diff --git a/MathLib/LinAlg/Sparse/CRSTools-impl.h b/MathLib/LinAlg/Sparse/CRSTools-impl.h index 5f7a17cb806..5316970ab3b 100644 --- a/MathLib/LinAlg/Sparse/CRSTools-impl.h +++ b/MathLib/LinAlg/Sparse/CRSTools-impl.h @@ -19,51 +19,51 @@ namespace MathLib template <typename VEC_T, typename FP_TYPE> void applyKnownSolution(CRSMatrix<FP_TYPE, typename VEC_T::IndexType>*& mat, - VEC_T &rhs, std::vector<typename VEC_T::IndexType> const& rows, - std::vector<FP_TYPE> const& vals) + VEC_T &rhs, std::vector<typename VEC_T::IndexType> const& rows, + std::vector<FP_TYPE> const& vals) { - std::unique_ptr<MathLib::CRSMatrix<FP_TYPE, typename VEC_T::IndexType>> mat_t(mat->getTranspose()); + std::unique_ptr<MathLib::CRSMatrix<FP_TYPE, typename VEC_T::IndexType>> mat_t(mat->getTranspose()); - // row pointer of the transposed matrix - typename VEC_T::IndexType const*const iAt(mat_t->getRowPtrArray()); - // column indices of the transposed matrix - typename VEC_T::IndexType const*const jAt(mat_t->getColIdxArray()); - // entries of the transposed matrix - double * At(const_cast<double *>(mat_t->getEntryArray())); + // row pointer of the transposed matrix + typename VEC_T::IndexType const*const iAt(mat_t->getRowPtrArray()); + // column indices of the transposed matrix + typename VEC_T::IndexType const*const jAt(mat_t->getColIdxArray()); + // entries of the transposed matrix + double * At(const_cast<double *>(mat_t->getEntryArray())); - // b_i -= A(i,k)*val, i!=k => b_i -= A(k,i)^T * val - // set A^T(k,i) = 0, i!=k (i.e. set column entries of original matrix to - // zero) - for (std::size_t r(0); r<rows.size(); ++r) { - auto const row = rows[r]; - auto const val = vals[r]; - for (typename VEC_T::IndexType j(iAt[row]); j<iAt[row+1]; ++j) { - if (jAt[j] == row) // skip diagonal entry - continue; - rhs.add(jAt[j], -At[j] * val); - At[j] = 0.0; - } - } + // b_i -= A(i,k)*val, i!=k => b_i -= A(k,i)^T * val + // set A^T(k,i) = 0, i!=k (i.e. set column entries of original matrix to + // zero) + for (std::size_t r(0); r<rows.size(); ++r) { + auto const row = rows[r]; + auto const val = vals[r]; + for (typename VEC_T::IndexType j(iAt[row]); j<iAt[row+1]; ++j) { + if (jAt[j] == row) // skip diagonal entry + continue; + rhs.add(jAt[j], -At[j] * val); + At[j] = 0.0; + } + } - delete mat; - mat = mat_t->getTranspose(); - typename VEC_T::IndexType const*const iA(mat->getRowPtrArray()); // row ptrs - typename VEC_T::IndexType const*const jA(mat->getColIdxArray()); // col idx - double * entries(const_cast<double*>(mat->getEntryArray())); + delete mat; + mat = mat_t->getTranspose(); + typename VEC_T::IndexType const*const iA(mat->getRowPtrArray()); // row ptrs + typename VEC_T::IndexType const*const jA(mat->getColIdxArray()); // col idx + double * entries(const_cast<double*>(mat->getEntryArray())); - // set row entries, except the diagonal entry, to zero - for (std::size_t r(0); r<rows.size(); ++r) { - auto const row = rows[r]; - for (typename VEC_T::IndexType j = iA[row]; j < iA[row + 1]; ++j) - { - if (jA[j] == row) { - entries[j] = 1.0; // A(row,row) = 1.0 - // rhs[row] = A(row,row) * vals[r] - rhs.set(row, vals[r]); - } else - entries[j] = 0.0; - } - } + // set row entries, except the diagonal entry, to zero + for (std::size_t r(0); r<rows.size(); ++r) { + auto const row = rows[r]; + for (typename VEC_T::IndexType j = iA[row]; j < iA[row + 1]; ++j) + { + if (jA[j] == row) { + entries[j] = 1.0; // A(row,row) = 1.0 + // rhs[row] = A(row,row) * vals[r] + rhs.set(row, vals[r]); + } else + entries[j] = 0.0; + } + } } } // MathLib diff --git a/MathLib/LinAlg/Sparse/CRSTools.h b/MathLib/LinAlg/Sparse/CRSTools.h index e0bef638550..737e79e720d 100644 --- a/MathLib/LinAlg/Sparse/CRSTools.h +++ b/MathLib/LinAlg/Sparse/CRSTools.h @@ -35,8 +35,8 @@ template<typename IDX_TYPE> class DenseVector; */ template <typename VEC_T, typename FP_TYPE = double> void applyKnownSolution(CRSMatrix<FP_TYPE, typename VEC_T::IndexType>*& mat, - VEC_T &rhs, std::vector<std::size_t> const& rows, - std::vector<FP_TYPE> const& vals); + VEC_T &rhs, std::vector<std::size_t> const& rows, + std::vector<FP_TYPE> const& vals); } // MathLib diff --git a/MathLib/LinAlg/Sparse/CRSTranspose.h b/MathLib/LinAlg/Sparse/CRSTranspose.h index c055ad87490..b7591e620ef 100644 --- a/MathLib/LinAlg/Sparse/CRSTranspose.h +++ b/MathLib/LinAlg/Sparse/CRSTranspose.h @@ -16,7 +16,7 @@ #define CRSTRANSPOSE_H_ void CS_transp(unsigned n, unsigned *iA, unsigned* jA, double* A, - unsigned *iB, unsigned *jB, double* B) + unsigned *iB, unsigned *jB, double* B) { unsigned nnz = iA[n]; unsigned *inz(new unsigned[n]); diff --git a/MathLib/LinAlg/Sparse/MatrixSparsityPattern.cpp b/MathLib/LinAlg/Sparse/MatrixSparsityPattern.cpp index 286acad98df..3ab04e23b67 100644 --- a/MathLib/LinAlg/Sparse/MatrixSparsityPattern.cpp +++ b/MathLib/LinAlg/Sparse/MatrixSparsityPattern.cpp @@ -16,7 +16,7 @@ namespace MathLib { MatrixSparsityPattern::MatrixSparsityPattern(std::size_t const n_rows) : - _pattern(n_rows) + _pattern(n_rows) {} MatrixSparsityPattern::~MatrixSparsityPattern() @@ -24,23 +24,23 @@ MatrixSparsityPattern::~MatrixSparsityPattern() std::size_t MatrixSparsityPattern::getNRows() const { - return _pattern.size(); + return _pattern.size(); } MatrixSparsityPattern::ConstRowIterator MatrixSparsityPattern::getRowBeginIterator( std::size_t const row) const { - return _pattern[row].cbegin(); + return _pattern[row].cbegin(); } MatrixSparsityPattern::ConstRowIterator MatrixSparsityPattern::getRowEndIterator( std::size_t const row) const { - return _pattern[row].cend(); + return _pattern[row].cend(); } void MatrixSparsityPattern::insert(std::size_t const row, std::size_t const col) { - _pattern[row].insert(col); + _pattern[row].insert(col); } } // end namespace MathLib diff --git a/MathLib/LinAlg/Sparse/MatrixSparsityPattern.h b/MathLib/LinAlg/Sparse/MatrixSparsityPattern.h index 4fe6148bae5..853487f057a 100644 --- a/MathLib/LinAlg/Sparse/MatrixSparsityPattern.h +++ b/MathLib/LinAlg/Sparse/MatrixSparsityPattern.h @@ -30,29 +30,29 @@ namespace MathLib class MatrixSparsityPattern { public: - /// Constant iterator over sorted entries of a row. - typedef std::set<std::size_t>::const_iterator ConstRowIterator; + /// Constant iterator over sorted entries of a row. + typedef std::set<std::size_t>::const_iterator ConstRowIterator; - explicit MatrixSparsityPattern(std::size_t const n_rows); - virtual ~MatrixSparsityPattern(); + explicit MatrixSparsityPattern(std::size_t const n_rows); + virtual ~MatrixSparsityPattern(); - /// Returns number of sparsity pattern rows. - std::size_t getNRows() const; + /// Returns number of sparsity pattern rows. + std::size_t getNRows() const; - /// Constant iterator over sorted entries of a row. - ConstRowIterator getRowBeginIterator(std::size_t const row) const; - /// Constant iterator over sorted entries of a row. - ConstRowIterator getRowEndIterator(std::size_t const row) const; + /// Constant iterator over sorted entries of a row. + ConstRowIterator getRowBeginIterator(std::size_t const row) const; + /// Constant iterator over sorted entries of a row. + ConstRowIterator getRowEndIterator(std::size_t const row) const; - /// Inserts an entry in the sparsity pattern. - /// \param row The row index the entry should be inserted to. The row index must be less or equal to the value returned by getNRows(). - /// \param col The column index. A new entry will be created if needed. - void insert(std::size_t const row, std::size_t const col); + /// Inserts an entry in the sparsity pattern. + /// \param row The row index the entry should be inserted to. The row index must be less or equal to the value returned by getNRows(). + /// \param col The column index. A new entry will be created if needed. + void insert(std::size_t const row, std::size_t const col); private: - DISALLOW_COPY_AND_ASSIGN(MatrixSparsityPattern); + DISALLOW_COPY_AND_ASSIGN(MatrixSparsityPattern); - std::vector<std::set<std::size_t> > _pattern; + std::vector<std::set<std::size_t> > _pattern; }; } // end namespace MathLib diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/AdjMat.cpp b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/AdjMat.cpp index 94d845c69ad..feb93e94c7d 100644 --- a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/AdjMat.cpp +++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/AdjMat.cpp @@ -20,49 +20,49 @@ namespace MathLib { AdjMat* AdjMat::getMat(unsigned beg, unsigned end, - const unsigned* const op_perm, const unsigned* const po_perm) const + const unsigned* const op_perm, const unsigned* const po_perm) const { - const unsigned nsize(end - beg); // size of new matrix - unsigned i, c; // row and col idx in permuted matrix - unsigned j, idx; // pointer in jA - unsigned r; // row idx in original matrix - - unsigned *iAn(new unsigned[nsize + 1]); - iAn[0] = 0; - - unsigned *pos(new unsigned[nsize + 1]); - for (i = 0; i <= nsize; i++) - pos[i] = 0; - - for (i = beg; i < end; i++) { - r = op_perm[i]; - idx = _row_ptr[r + 1]; - for (j = _row_ptr[r]; j < idx; j++) { - c = po_perm[_col_idx[j]]; - if (beg <= c && c < end) - ++pos[i - beg]; - } - } - for (i = 0; i < nsize; i++) - iAn[i + 1] = iAn[i] + pos[i]; - for (i = 0; i < nsize; i++) - pos[i] = iAn[i]; - - unsigned *jAn(new unsigned[iAn[nsize]]); - for (i = beg; i < end; i++) { - r = op_perm[i]; - idx = _row_ptr[r + 1]; - for (j = _row_ptr[r]; j < idx; j++) { - c = po_perm[_col_idx[j]]; - if (beg <= c && c < end) - jAn[pos[i - beg]++] = c - beg; - } - } - - delete[] pos; - for (i = 0; i < nsize; ++i) - std::sort(jAn+iAn[i], jAn+iAn[i+1]); - return new AdjMat(nsize, iAn, jAn, nullptr); + const unsigned nsize(end - beg); // size of new matrix + unsigned i, c; // row and col idx in permuted matrix + unsigned j, idx; // pointer in jA + unsigned r; // row idx in original matrix + + unsigned *iAn(new unsigned[nsize + 1]); + iAn[0] = 0; + + unsigned *pos(new unsigned[nsize + 1]); + for (i = 0; i <= nsize; i++) + pos[i] = 0; + + for (i = beg; i < end; i++) { + r = op_perm[i]; + idx = _row_ptr[r + 1]; + for (j = _row_ptr[r]; j < idx; j++) { + c = po_perm[_col_idx[j]]; + if (beg <= c && c < end) + ++pos[i - beg]; + } + } + for (i = 0; i < nsize; i++) + iAn[i + 1] = iAn[i] + pos[i]; + for (i = 0; i < nsize; i++) + pos[i] = iAn[i]; + + unsigned *jAn(new unsigned[iAn[nsize]]); + for (i = beg; i < end; i++) { + r = op_perm[i]; + idx = _row_ptr[r + 1]; + for (j = _row_ptr[r]; j < idx; j++) { + c = po_perm[_col_idx[j]]; + if (beg <= c && c < end) + jAn[pos[i - beg]++] = c - beg; + } + } + + delete[] pos; + for (i = 0; i < nsize; ++i) + std::sort(jAn+iAn[i], jAn+iAn[i+1]); + return new AdjMat(nsize, iAn, jAn, nullptr); } /** @@ -74,138 +74,138 @@ AdjMat* AdjMat::getMat(unsigned beg, unsigned end, static void genAdjMat(unsigned n, unsigned* &iA, unsigned* &jA) { - unsigned i; - // count entries of each row - unsigned* iAn = new unsigned[n + 1]; - for (i = 0; i <= n; ++i) - iAn[i] = 0; - - // go through all strictly lower triangular entries (i,j) and check - // whether (j,i) exists in the upper triangular part - - // set n pointers to the beginning of each row - unsigned* co = new unsigned[n]; - for (i = 0; i < n; ++i) - co[i] = iA[i]; - - for (i = 0; i < n; ++i) - for (unsigned k = iA[i]; k < iA[i + 1]; ++k) { - unsigned j = jA[k]; - if (i < j) - ++iAn[i + 1]; // upper triangular entries count - else { // lower triangular only if there is no counter part - unsigned k1 = iA[j], k2 = iA[j + 1]; - if (i < jA[k1] || i > jA[k2 - 1]) - ++iAn[j + 1]; // i is out of bounds - else { // go through all uninspected entries in the jth row - while (co[j] < k2 && i > jA[co[j]]) - ++co[j]; - if (co[j] == k2 || i < jA[co[j]]) - ++iAn[j + 1]; - } - } - } - - // construct array iAn by summing up the contents of iAn - // con is a set of pointer refering to iAn - unsigned* con = new unsigned[n]; - co[0] = con[0] = 0; - for (i = 1; i < n; ++i) { - co[i] = iA[i]; - con[i] = iAn[i]; - iAn[i + 1] += iAn[i]; - } - - unsigned *jAn = new unsigned[iAn[n]]; - for (i = 1; i < n; ++i) - for (unsigned k = iA[i]; k < iA[i + 1]; ++k) { - unsigned j = jA[k]; - // copy all transposed lower triangular entries and all upper - // triangular elements up to that position - if (j < i) { - while (co[j] < iA[j + 1] && i > jA[co[j]]) { - if (jA[co[j]] > j) - jAn[con[j]++] = jA[co[j]]; - ++co[j]; - } - - if (co[j] == iA[j + 1] || i <= jA[co[j]]) { - jAn[con[j]++] = i; - ++co[i]; - if (i == jA[co[j]]) - ++co[j]; - } - } - } - - // finish rows - for (i = 0; i < n; ++i) - for (unsigned k = co[i]; k < iA[i + 1]; ++k) - if (i < jA[k]) - jAn[con[i]++] = jA[k]; - - std::swap(jA, jAn); - std::swap(iA, iAn); - - delete[] jAn; - delete[] con; - delete[] co; - delete[] iAn; + unsigned i; + // count entries of each row + unsigned* iAn = new unsigned[n + 1]; + for (i = 0; i <= n; ++i) + iAn[i] = 0; + + // go through all strictly lower triangular entries (i,j) and check + // whether (j,i) exists in the upper triangular part + + // set n pointers to the beginning of each row + unsigned* co = new unsigned[n]; + for (i = 0; i < n; ++i) + co[i] = iA[i]; + + for (i = 0; i < n; ++i) + for (unsigned k = iA[i]; k < iA[i + 1]; ++k) { + unsigned j = jA[k]; + if (i < j) + ++iAn[i + 1]; // upper triangular entries count + else { // lower triangular only if there is no counter part + unsigned k1 = iA[j], k2 = iA[j + 1]; + if (i < jA[k1] || i > jA[k2 - 1]) + ++iAn[j + 1]; // i is out of bounds + else { // go through all uninspected entries in the jth row + while (co[j] < k2 && i > jA[co[j]]) + ++co[j]; + if (co[j] == k2 || i < jA[co[j]]) + ++iAn[j + 1]; + } + } + } + + // construct array iAn by summing up the contents of iAn + // con is a set of pointer refering to iAn + unsigned* con = new unsigned[n]; + co[0] = con[0] = 0; + for (i = 1; i < n; ++i) { + co[i] = iA[i]; + con[i] = iAn[i]; + iAn[i + 1] += iAn[i]; + } + + unsigned *jAn = new unsigned[iAn[n]]; + for (i = 1; i < n; ++i) + for (unsigned k = iA[i]; k < iA[i + 1]; ++k) { + unsigned j = jA[k]; + // copy all transposed lower triangular entries and all upper + // triangular elements up to that position + if (j < i) { + while (co[j] < iA[j + 1] && i > jA[co[j]]) { + if (jA[co[j]] > j) + jAn[con[j]++] = jA[co[j]]; + ++co[j]; + } + + if (co[j] == iA[j + 1] || i <= jA[co[j]]) { + jAn[con[j]++] = i; + ++co[i]; + if (i == jA[co[j]]) + ++co[j]; + } + } + } + + // finish rows + for (i = 0; i < n; ++i) + for (unsigned k = co[i]; k < iA[i + 1]; ++k) + if (i < jA[k]) + jAn[con[i]++] = jA[k]; + + std::swap(jA, jAn); + std::swap(iA, iAn); + + delete[] jAn; + delete[] con; + delete[] co; + delete[] iAn; } static void genFullAdjMat(unsigned n, unsigned* &iA, unsigned* &jA) { - unsigned i; - // count entries of each column - unsigned* cnt = new unsigned[n]; - for (i = 0; i < n; ++i) - cnt[i] = 0; - - for (i = 0; i < n; ++i) { - unsigned j = iA[i], idx = iA[i + 1]; - while (j < idx) { - cnt[jA[j]]++; - j++; - } - } - - // summing up entries - for (i = 2; i < n; ++i) - cnt[i] += cnt[i - 1]; - - unsigned* iAn = new unsigned[n + 1]; // VALGRIND meldet hier Fehler - iAn[0] = 0; - for (i = 1; i <= n; ++i) - iAn[i] = iA[i] + cnt[i - 1]; - - unsigned *jAn = new unsigned[iAn[n]]; - for (unsigned k = 0; k < n; k++) - cnt[k] = iAn[k]; - - for (i = 0; i < n; ++i) { - unsigned j = iA[i], idx = iA[i + 1]; - while (j < idx) { - jAn[cnt[i]++] = jA[j]; - jAn[cnt[jA[j]]++] = i; - j++; - } - } - - std::swap(jA, jAn); - std::swap(iA, iAn); - - delete[] jAn; - delete[] iAn; - delete[] cnt; + unsigned i; + // count entries of each column + unsigned* cnt = new unsigned[n]; + for (i = 0; i < n; ++i) + cnt[i] = 0; + + for (i = 0; i < n; ++i) { + unsigned j = iA[i], idx = iA[i + 1]; + while (j < idx) { + cnt[jA[j]]++; + j++; + } + } + + // summing up entries + for (i = 2; i < n; ++i) + cnt[i] += cnt[i - 1]; + + unsigned* iAn = new unsigned[n + 1]; // VALGRIND meldet hier Fehler + iAn[0] = 0; + for (i = 1; i <= n; ++i) + iAn[i] = iA[i] + cnt[i - 1]; + + unsigned *jAn = new unsigned[iAn[n]]; + for (unsigned k = 0; k < n; k++) + cnt[k] = iAn[k]; + + for (i = 0; i < n; ++i) { + unsigned j = iA[i], idx = iA[i + 1]; + while (j < idx) { + jAn[cnt[i]++] = jA[j]; + jAn[cnt[jA[j]]++] = i; + j++; + } + } + + std::swap(jA, jAn); + std::swap(iA, iAn); + + delete[] jAn; + delete[] iAn; + delete[] cnt; } void AdjMat::makeSymmetric() { - // store upper triangular mat values - genAdjMat(this->_n_rows, _row_ptr, _col_idx); - // mirror the upper triangular part into lower - genFullAdjMat(this->_n_rows, _row_ptr, _col_idx); + // store upper triangular mat values + genAdjMat(this->_n_rows, _row_ptr, _col_idx); + // mirror the upper triangular part into lower + genFullAdjMat(this->_n_rows, _row_ptr, _col_idx); } } // end namespace MathLib diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/AdjMat.h b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/AdjMat.h index 14956c4d24b..116154af20f 100644 --- a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/AdjMat.h +++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/AdjMat.h @@ -26,43 +26,43 @@ namespace MathLib { class AdjMat: public CRSMatrix<unsigned, unsigned> { public: - /** constructor with data elements in A - * @param s size of the quadratic matrix - * @param iA array of length s+1 which holds pointers in jA, - * iA[k] points to the first non-zero column-entry of row k, - * iA[k]-1 points accordingly to the last non-zero column-entry of row k, - * the last entry of iA (iA[s]) takes the number of non zero entries(nnz) - * @param jA array of length nnz, each entry is a colum-index - * @param A data-array of length nnz of type unsigned (weights) - */ - AdjMat(unsigned s, unsigned *iA, unsigned *jA, unsigned *A = NULL) : - CRSMatrix<unsigned, unsigned> (s, iA, jA, A) - {} + /** constructor with data elements in A + * @param s size of the quadratic matrix + * @param iA array of length s+1 which holds pointers in jA, + * iA[k] points to the first non-zero column-entry of row k, + * iA[k]-1 points accordingly to the last non-zero column-entry of row k, + * the last entry of iA (iA[s]) takes the number of non zero entries(nnz) + * @param jA array of length nnz, each entry is a colum-index + * @param A data-array of length nnz of type unsigned (weights) + */ + AdjMat(unsigned s, unsigned *iA, unsigned *jA, unsigned *A = NULL) : + CRSMatrix<unsigned, unsigned> (s, iA, jA, A) + {} - /** - * destructor - */ - virtual ~AdjMat() - {} + /** + * destructor + */ + virtual ~AdjMat() + {} - /** - * - */ - void makeSymmetric(); + /** + * + */ + void makeSymmetric(); - /** getMat returns the (possibly reducible) block [beg,end-1] x [beg,end-1] - * respecting the permutation. - * @param beg index of first row/column, it is supposed that 0 <= beg <= n - * @param end index one after last row/column, it is supposed that beg <= end <= n - * @param op_perm permutation -> original - * @param po_perm original -> permutation - * @return pointer to an AdjMat object - */ - AdjMat* getMat(unsigned beg, unsigned end, unsigned const* const op_perm, - unsigned const* const po_perm) const; + /** getMat returns the (possibly reducible) block [beg,end-1] x [beg,end-1] + * respecting the permutation. + * @param beg index of first row/column, it is supposed that 0 <= beg <= n + * @param end index one after last row/column, it is supposed that beg <= end <= n + * @param op_perm permutation -> original + * @param po_perm original -> permutation + * @return pointer to an AdjMat object + */ + AdjMat* getMat(unsigned beg, unsigned end, unsigned const* const op_perm, + unsigned const* const po_perm) const; private: - DISALLOW_COPY_AND_ASSIGN(AdjMat); + DISALLOW_COPY_AND_ASSIGN(AdjMat); }; } diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReordered.cpp b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReordered.cpp index bce0078ae06..6fcfde26e93 100644 --- a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReordered.cpp +++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReordered.cpp @@ -21,11 +21,11 @@ namespace MathLib { CRSMatrixReordered::CRSMatrixReordered(std::string const &fname) : - CRSMatrix<double,unsigned>(fname) + CRSMatrix<double,unsigned>(fname) {} CRSMatrixReordered::CRSMatrixReordered(unsigned n, unsigned *iA, unsigned *jA, double* A) : - CRSMatrix<double, unsigned> (n, iA, jA, A) + CRSMatrix<double, unsigned> (n, iA, jA, A) {} CRSMatrixReordered::~CRSMatrixReordered() @@ -33,47 +33,47 @@ CRSMatrixReordered::~CRSMatrixReordered() void CRSMatrixReordered::reorderMatrix(unsigned const*const op_perm, unsigned const*const po_perm) { - unsigned i; // row and col idx in permuted matrix - unsigned j, idx; // pointer in jA - - const unsigned size(getNRows()); - - unsigned *pos(new unsigned[size + 1]); - for (i = 0; i < size; i++) { - const unsigned original_row(op_perm[i]); - pos[i] = _row_ptr[original_row+1] - _row_ptr[original_row]; - } - pos[size] = 0; - - unsigned *iAn(new unsigned[size + 1]); - iAn[0] = 0; - for (i = 0; i < size; i++) - iAn[i + 1] = iAn[i] + pos[i]; - for (i = 0; i < size; i++) - pos[i] = iAn[i]; - - unsigned *jAn(new unsigned[iAn[size]]); - double *An(new double[iAn[size]]); - for (i = 0; i < size; i++) { - const unsigned original_row(op_perm[i]); - idx = _row_ptr[original_row+1]; - for (j = _row_ptr[original_row]; j < idx; j++) { - jAn[pos[i]] = po_perm[_col_idx[j]]; - An[pos[i]++] = _data[j]; - } - } - - delete[] pos; - for (i = 0; i < size; ++i) - BaseLib::quicksort(jAn, static_cast<std::size_t>(iAn[i]), static_cast<std::size_t>(iAn[i + 1]), An); - - std::swap(iAn, _row_ptr); - std::swap(jAn, _col_idx); - std::swap(An, _data); - - delete [] iAn; - delete [] jAn; - delete [] An; + unsigned i; // row and col idx in permuted matrix + unsigned j, idx; // pointer in jA + + const unsigned size(getNRows()); + + unsigned *pos(new unsigned[size + 1]); + for (i = 0; i < size; i++) { + const unsigned original_row(op_perm[i]); + pos[i] = _row_ptr[original_row+1] - _row_ptr[original_row]; + } + pos[size] = 0; + + unsigned *iAn(new unsigned[size + 1]); + iAn[0] = 0; + for (i = 0; i < size; i++) + iAn[i + 1] = iAn[i] + pos[i]; + for (i = 0; i < size; i++) + pos[i] = iAn[i]; + + unsigned *jAn(new unsigned[iAn[size]]); + double *An(new double[iAn[size]]); + for (i = 0; i < size; i++) { + const unsigned original_row(op_perm[i]); + idx = _row_ptr[original_row+1]; + for (j = _row_ptr[original_row]; j < idx; j++) { + jAn[pos[i]] = po_perm[_col_idx[j]]; + An[pos[i]++] = _data[j]; + } + } + + delete[] pos; + for (i = 0; i < size; ++i) + BaseLib::quicksort(jAn, static_cast<std::size_t>(iAn[i]), static_cast<std::size_t>(iAn[i + 1]), An); + + std::swap(iAn, _row_ptr); + std::swap(jAn, _col_idx); + std::swap(An, _data); + + delete [] iAn; + delete [] jAn; + delete [] An; } } // end namespace MathLib diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReordered.h b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReordered.h index c249e615909..1acf92378be 100644 --- a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReordered.h +++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReordered.h @@ -24,10 +24,10 @@ namespace MathLib { class CRSMatrixReordered: public MathLib::CRSMatrix<double,unsigned> { public: - CRSMatrixReordered(std::string const &fname); - CRSMatrixReordered(unsigned n, unsigned *iA, unsigned *jA, double* A); - virtual ~CRSMatrixReordered(); - void reorderMatrix(unsigned const*const op_perm, unsigned const*const po_perm); + CRSMatrixReordered(std::string const &fname); + CRSMatrixReordered(unsigned n, unsigned *iA, unsigned *jA, double* A); + virtual ~CRSMatrixReordered(); + void reorderMatrix(unsigned const*const op_perm, unsigned const*const po_perm); }; } diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.cpp b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.cpp index 7a5dcde6800..3c1460603e7 100644 --- a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.cpp +++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.cpp @@ -19,7 +19,7 @@ namespace MathLib { CRSMatrixReorderedOpenMP::CRSMatrixReorderedOpenMP(unsigned n, unsigned *iA, unsigned *jA, double* A) : - CRSMatrixReordered(n, iA, jA, A) + CRSMatrixReordered(n, iA, jA, A) { } @@ -28,7 +28,7 @@ CRSMatrixReorderedOpenMP::~CRSMatrixReorderedOpenMP() void CRSMatrixReorderedOpenMP::amux(double d, double const * const x, double *y) const { - amuxCRSParallelOpenMP(d, this->_n_rows, this->_row_ptr, this->_col_idx, this->_data, x, y); + amuxCRSParallelOpenMP(d, this->_n_rows, this->_row_ptr, this->_col_idx, this->_data, x, y); } } // end namespace MathLib diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.h b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.h index 387c0c7ce40..fc0b8653731 100644 --- a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.h +++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/CRSMatrixReorderedOpenMP.h @@ -22,10 +22,10 @@ namespace MathLib { class CRSMatrixReorderedOpenMP : public CRSMatrixReordered { public: - CRSMatrixReorderedOpenMP(unsigned n, unsigned *iA, unsigned *jA, double* A); - virtual ~CRSMatrixReorderedOpenMP(); + CRSMatrixReorderedOpenMP(unsigned n, unsigned *iA, unsigned *jA, double* A); + virtual ~CRSMatrixReorderedOpenMP(); - virtual void amux(double d, double const * const __restrict__ x, double *__restrict__ y) const; + virtual void amux(double d, double const * const __restrict__ x, double *__restrict__ y) const; }; } diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Cluster.cpp b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Cluster.cpp index 0fd700066b1..e1148e85785 100644 --- a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Cluster.cpp +++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Cluster.cpp @@ -37,179 +37,179 @@ Cluster::Cluster(ClusterBase* father, unsigned beg, unsigned end, void Cluster::subdivide(unsigned bmin) { - const unsigned size(_end - _beg); - if (size > bmin) { - - idx_t n_rows(static_cast<idx_t>(_l_adj_mat->getNRows())); - - idx_t *xadj(new idx_t[n_rows+1]); - unsigned const*const original_row_ptr(_l_adj_mat->getRowPtrArray()); - for(idx_t k(0); k<=n_rows; k++) { - xadj[k] = original_row_ptr[k]; - } - - unsigned nnz(_l_adj_mat->getNNZ()); - idx_t *adjncy(new idx_t[nnz]); - unsigned const*const original_adjncy(_l_adj_mat->getColIdxArray()); - for(unsigned k(0); k<nnz; k++) { - adjncy[k] = original_adjncy[k]; - } -// unsigned nparts = 2; - idx_t options[METIS_NOPTIONS]; // for METIS - METIS_SetDefaultOptions(options); -// options[METIS OPTION PTYPE] = METIS PTYPE RB; -// options[METIS OPTION OBJTYPE] = METIS OBJTYPE CUT; -// options[METIS OPTION CTYPE] = METIS CTYPE SHEM; -// options[] = ; -// options[] = ; -// options[] = ; - -// unsigned sepsize(0); // for METIS - idx_t *vwgt(new idx_t[n_rows + 1]); -// const unsigned nnz(xadj[n_rows]); -// unsigned *adjwgt(new unsigned[nnz]); - for (idx_t k(0); k < n_rows + 1; k++) - vwgt[k] = 1; -// for (unsigned k(0); k < nnz; k++) -// adjwgt[k] = 1; -// unsigned *part(new unsigned[n_rows + 1]); - - // subdivide the index set into three parts employing METIS -// METIS_ComputeVertexSeparator(&n_rows, xadj, adjncy, vwgt, &options, -// &sepsize, part); - idx_t *loc_op_perm(new idx_t[n_rows]); - idx_t *loc_po_perm(new idx_t[n_rows]); - for (idx_t k(0); k<n_rows; k++) { - loc_op_perm[k] = _g_op_perm[k]; - } - for (idx_t k(0); k<n_rows; k++) { - loc_po_perm[k] = _g_po_perm[k]; - } - METIS_NodeND(&n_rows, xadj, adjncy, vwgt, options, loc_op_perm, loc_po_perm); - for (idx_t k(0); k<n_rows; k++) { - _g_op_perm[k] = loc_op_perm[k]; - } - for (idx_t k(0); k<n_rows; k++) { - _g_po_perm[k] = loc_po_perm[k]; - } - delete [] loc_op_perm; - delete [] loc_po_perm; - delete [] vwgt; - delete [] adjncy; - delete [] xadj; -// // create and init local permutations -// unsigned *l_op_perm(new unsigned[size]); -// unsigned *l_po_perm(new unsigned[size]); -// for (unsigned i = 0; i < size; ++i) -// l_op_perm[i] = l_po_perm[i] = i; + const unsigned size(_end - _beg); + if (size > bmin) { + + idx_t n_rows(static_cast<idx_t>(_l_adj_mat->getNRows())); + + idx_t *xadj(new idx_t[n_rows+1]); + unsigned const*const original_row_ptr(_l_adj_mat->getRowPtrArray()); + for(idx_t k(0); k<=n_rows; k++) { + xadj[k] = original_row_ptr[k]; + } + + unsigned nnz(_l_adj_mat->getNNZ()); + idx_t *adjncy(new idx_t[nnz]); + unsigned const*const original_adjncy(_l_adj_mat->getColIdxArray()); + for(unsigned k(0); k<nnz; k++) { + adjncy[k] = original_adjncy[k]; + } +// unsigned nparts = 2; + idx_t options[METIS_NOPTIONS]; // for METIS + METIS_SetDefaultOptions(options); +// options[METIS OPTION PTYPE] = METIS PTYPE RB; +// options[METIS OPTION OBJTYPE] = METIS OBJTYPE CUT; +// options[METIS OPTION CTYPE] = METIS CTYPE SHEM; +// options[] = ; +// options[] = ; +// options[] = ; + +// unsigned sepsize(0); // for METIS + idx_t *vwgt(new idx_t[n_rows + 1]); +// const unsigned nnz(xadj[n_rows]); +// unsigned *adjwgt(new unsigned[nnz]); + for (idx_t k(0); k < n_rows + 1; k++) + vwgt[k] = 1; +// for (unsigned k(0); k < nnz; k++) +// adjwgt[k] = 1; +// unsigned *part(new unsigned[n_rows + 1]); + + // subdivide the index set into three parts employing METIS +// METIS_ComputeVertexSeparator(&n_rows, xadj, adjncy, vwgt, &options, +// &sepsize, part); + idx_t *loc_op_perm(new idx_t[n_rows]); + idx_t *loc_po_perm(new idx_t[n_rows]); + for (idx_t k(0); k<n_rows; k++) { + loc_op_perm[k] = _g_op_perm[k]; + } + for (idx_t k(0); k<n_rows; k++) { + loc_po_perm[k] = _g_po_perm[k]; + } + METIS_NodeND(&n_rows, xadj, adjncy, vwgt, options, loc_op_perm, loc_po_perm); + for (idx_t k(0); k<n_rows; k++) { + _g_op_perm[k] = loc_op_perm[k]; + } + for (idx_t k(0); k<n_rows; k++) { + _g_po_perm[k] = loc_po_perm[k]; + } + delete [] loc_op_perm; + delete [] loc_po_perm; + delete [] vwgt; + delete [] adjncy; + delete [] xadj; +// // create and init local permutations +// unsigned *l_op_perm(new unsigned[size]); +// unsigned *l_po_perm(new unsigned[size]); +// for (unsigned i = 0; i < size; ++i) +// l_op_perm[i] = l_po_perm[i] = i; // -// unsigned isep1, isep2; -// updatePerm(part, isep1, isep2, l_op_perm, l_po_perm); -// delete[] part; +// unsigned isep1, isep2; +// updatePerm(part, isep1, isep2, l_op_perm, l_po_perm); +// delete[] part; // -// // update global permutation -// unsigned *t_op_perm = new unsigned[size]; -// for (unsigned k = 0; k < size; ++k) -// t_op_perm[k] = _g_op_perm[_beg + l_op_perm[k]]; +// // update global permutation +// unsigned *t_op_perm = new unsigned[size]; +// for (unsigned k = 0; k < size; ++k) +// t_op_perm[k] = _g_op_perm[_beg + l_op_perm[k]]; // -// for (unsigned k = _beg; k < _end; ++k) { -// _g_op_perm[k] = t_op_perm[k - _beg]; -// _g_po_perm[_g_op_perm[k]] = k; -// } -// delete[] t_op_perm; +// for (unsigned k = _beg; k < _end; ++k) { +// _g_op_perm[k] = t_op_perm[k - _beg]; +// _g_po_perm[_g_op_perm[k]] = k; +// } +// delete[] t_op_perm; // -// // next recursion step -// if ((isep1 >= bmin) && (isep2 - isep1 >= bmin)) { -// // construct adj matrices for [0, isep1), [isep1,isep2), [isep2, _end) -// AdjMat *l_adj0(_l_adj_mat->getMat(0, isep1, l_op_perm, l_po_perm)); -// AdjMat *l_adj1(_l_adj_mat->getMat(isep1, isep2, l_op_perm, l_po_perm)); -// AdjMat *l_adj2(_l_adj_mat->getMat(isep2, size, l_op_perm, l_po_perm)); +// // next recursion step +// if ((isep1 >= bmin) && (isep2 - isep1 >= bmin)) { +// // construct adj matrices for [0, isep1), [isep1,isep2), [isep2, _end) +// AdjMat *l_adj0(_l_adj_mat->getMat(0, isep1, l_op_perm, l_po_perm)); +// AdjMat *l_adj1(_l_adj_mat->getMat(isep1, isep2, l_op_perm, l_po_perm)); +// AdjMat *l_adj2(_l_adj_mat->getMat(isep2, size, l_op_perm, l_po_perm)); // -// delete[] l_op_perm; -// delete[] l_po_perm; -// delete _l_adj_mat; -// _l_adj_mat = NULL; +// delete[] l_op_perm; +// delete[] l_po_perm; +// delete _l_adj_mat; +// _l_adj_mat = NULL; // -// _n_sons = 3; -// _sons = new ClusterBase*[_n_sons]; +// _n_sons = 3; +// _sons = new ClusterBase*[_n_sons]; // -// isep1 += _beg; -// isep2 += _beg; +// isep1 += _beg; +// isep2 += _beg; // -// // constructing child nodes for index cluster tree -// _sons[0] = new Cluster(this, _beg, isep1, _g_op_perm, _g_po_perm, _g_adj_mat, l_adj0); -// _sons[1] = new Cluster(this, isep1, isep2, _g_op_perm, _g_po_perm, _g_adj_mat, l_adj1); -// _sons[2] = new Separator(this, isep2, _end, _g_op_perm, _g_po_perm, _g_adj_mat, l_adj2); +// // constructing child nodes for index cluster tree +// _sons[0] = new Cluster(this, _beg, isep1, _g_op_perm, _g_po_perm, _g_adj_mat, l_adj0); +// _sons[1] = new Cluster(this, isep1, isep2, _g_op_perm, _g_po_perm, _g_adj_mat, l_adj1); +// _sons[2] = new Separator(this, isep2, _end, _g_op_perm, _g_po_perm, _g_adj_mat, l_adj2); // -// dynamic_cast<Cluster*>(_sons[0])->subdivide(bmin); -// dynamic_cast<Cluster*>(_sons[1])->subdivide(bmin); +// dynamic_cast<Cluster*>(_sons[0])->subdivide(bmin); +// dynamic_cast<Cluster*>(_sons[1])->subdivide(bmin); // -// } else { -// delete _l_adj_mat; -// _l_adj_mat = NULL; -// } // end if next recursion step - } // end if ( connected && size () > bmin ) +// } else { +// delete _l_adj_mat; +// _l_adj_mat = NULL; +// } // end if next recursion step + } // end if ( connected && size () > bmin ) } void Cluster::updatePerm(unsigned* reordering, unsigned &isep0, - unsigned &isep1, unsigned* l_op_perm, unsigned* l_po_perm) + unsigned &isep1, unsigned* l_op_perm, unsigned* l_po_perm) { - unsigned beg = 0, end = _end - _beg; - while (beg < end) { - if (reordering[beg] >= 1) { - --end; - while (beg < end && reordering[end] >= 1) - --end; - // local permutation - std::swap(l_op_perm[beg], l_op_perm[end]); - std::swap(l_po_perm[l_op_perm[beg]], l_po_perm[l_op_perm[end]]); - std::swap(reordering[beg], reordering[end]); - } - ++beg; - } - if (beg > end) - isep0 = beg - 1; - else - isep0 = end; - - beg = isep0, end = _end - _beg; - while (beg < end) { - if (reordering[beg] == 2) { - --end; - while (beg < end && reordering[end] == 2) - --end; - // local permutation - std::swap(l_op_perm[beg], l_op_perm[end]); - std::swap(l_po_perm[l_op_perm[beg]], l_po_perm[l_op_perm[end]]); - std::swap(reordering[beg], reordering[end]); - } - ++beg; - } - if (beg > end) - isep1 = beg - 1; - else - isep1 = end; + unsigned beg = 0, end = _end - _beg; + while (beg < end) { + if (reordering[beg] >= 1) { + --end; + while (beg < end && reordering[end] >= 1) + --end; + // local permutation + std::swap(l_op_perm[beg], l_op_perm[end]); + std::swap(l_po_perm[l_op_perm[beg]], l_po_perm[l_op_perm[end]]); + std::swap(reordering[beg], reordering[end]); + } + ++beg; + } + if (beg > end) + isep0 = beg - 1; + else + isep0 = end; + + beg = isep0, end = _end - _beg; + while (beg < end) { + if (reordering[beg] == 2) { + --end; + while (beg < end && reordering[end] == 2) + --end; + // local permutation + std::swap(l_op_perm[beg], l_op_perm[end]); + std::swap(l_po_perm[l_op_perm[beg]], l_po_perm[l_op_perm[end]]); + std::swap(reordering[beg], reordering[end]); + } + ++beg; + } + if (beg > end) + isep1 = beg - 1; + else + isep1 = end; } void Cluster::createClusterTree(unsigned* op_perm, unsigned* po_perm, - unsigned bmin) + unsigned bmin) { - _g_op_perm = op_perm; - _g_po_perm = po_perm; - - // *** 1 create local problem - unsigned n = _g_adj_mat->getNRows(); - unsigned *l_op_perm = new unsigned[n]; - unsigned *l_po_perm = new unsigned[n]; - for (unsigned k = 0; k < n; ++k) - l_op_perm[k] = l_po_perm[k] = k; - _l_adj_mat = _l_adj_mat->getMat(0, n, l_op_perm, l_po_perm); - - // *** 2 create cluster tree - subdivide(bmin); + _g_op_perm = op_perm; + _g_po_perm = po_perm; + + // *** 1 create local problem + unsigned n = _g_adj_mat->getNRows(); + unsigned *l_op_perm = new unsigned[n]; + unsigned *l_po_perm = new unsigned[n]; + for (unsigned k = 0; k < n; ++k) + l_op_perm[k] = l_po_perm[k] = k; + _l_adj_mat = _l_adj_mat->getMat(0, n, l_op_perm, l_po_perm); + + // *** 2 create cluster tree + subdivide(bmin); } } // end namespace MathLib diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Cluster.h b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Cluster.h index 1d3ff9da787..9a2be5da64b 100644 --- a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Cluster.h +++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Cluster.h @@ -26,58 +26,58 @@ namespace MathLib { class Cluster: public ClusterBase { public: - /** - * Constructor creates the root of the cluster tree - * @param n - * @param jA - * @param iA - */ - Cluster(unsigned n, unsigned* iA, unsigned* jA); + /** + * Constructor creates the root of the cluster tree + * @param n + * @param jA + * @param iA + */ + Cluster(unsigned n, unsigned* iA, unsigned* jA); - virtual void subdivide(unsigned bmin); + virtual void subdivide(unsigned bmin); - /** Method returns the status of this ClusterBase object. In this case - * instances of this class are "normal" Clusters. - * @return false - */ - virtual bool isSeparator() const - { - return false; - } + /** Method returns the status of this ClusterBase object. In this case + * instances of this class are "normal" Clusters. + * @return false + */ + virtual bool isSeparator() const + { + return false; + } - /** Destructor. */ - virtual ~Cluster() {} + /** Destructor. */ + virtual ~Cluster() {} - /** - * Method creates recursively the cluster tree, i.e. changes the permutation - * op_perm and po_perm and create child cluster trees. For this task only the - * adjacency matrix is used. - * @param op_perm permutation: original_idx = op_perm[permutated_idx] - * @param po_perm reverse permutation: permutated_idx = po_perm[original_idx] - * @param bmin threshold value for stopping further refinement - * @return a cluster tree - */ - virtual void createClusterTree(unsigned* op_perm, unsigned* po_perm, - unsigned bmin = 50); + /** + * Method creates recursively the cluster tree, i.e. changes the permutation + * op_perm and po_perm and create child cluster trees. For this task only the + * adjacency matrix is used. + * @param op_perm permutation: original_idx = op_perm[permutated_idx] + * @param po_perm reverse permutation: permutated_idx = po_perm[original_idx] + * @param bmin threshold value for stopping further refinement + * @return a cluster tree + */ + virtual void createClusterTree(unsigned* op_perm, unsigned* po_perm, + unsigned bmin = 50); protected: - /** \brief Constructor - \param father parent node in cluster tree - \param beg beginning index of the cluster - \param end beginning index of the next cluster - \param op_perm permutation - \param po_perm permutation - \param global_mat reference to adjacency matrix of the matrix graph in - crs format - \param local_mat pointer to the local adjacency matrix of the matrix - graph in crs format - */ - Cluster(ClusterBase* father, unsigned beg, unsigned end, unsigned* op_perm, - unsigned* po_perm, AdjMat* global_mat, AdjMat* local_mat); + /** \brief Constructor + \param father parent node in cluster tree + \param beg beginning index of the cluster + \param end beginning index of the next cluster + \param op_perm permutation + \param po_perm permutation + \param global_mat reference to adjacency matrix of the matrix graph in + crs format + \param local_mat pointer to the local adjacency matrix of the matrix + graph in crs format + */ + Cluster(ClusterBase* father, unsigned beg, unsigned end, unsigned* op_perm, + unsigned* po_perm, AdjMat* global_mat, AdjMat* local_mat); private: - /** update perm */ - void updatePerm(unsigned* reordering, unsigned &isep0, unsigned &isep1, unsigned* l_op_perm, unsigned* l_po_perm); + /** update perm */ + void updatePerm(unsigned* reordering, unsigned &isep0, unsigned &isep1, unsigned* l_op_perm, unsigned* l_po_perm); }; } // end namespace MathLib diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/ClusterBase.cpp b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/ClusterBase.cpp index d4072f4f3d3..c671588c389 100644 --- a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/ClusterBase.cpp +++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/ClusterBase.cpp @@ -19,54 +19,54 @@ namespace MathLib { ClusterBase::ClusterBase(unsigned n, unsigned const* const iA, - unsigned const*const jA) : - _beg(0), _end(n), _n_sons(0), _sons(NULL), _parent(NULL), _g_op_perm(NULL), - _g_po_perm(NULL), _g_adj_mat(NULL), _l_adj_mat(NULL) + unsigned const*const jA) : + _beg(0), _end(n), _n_sons(0), _sons(NULL), _parent(NULL), _g_op_perm(NULL), + _g_po_perm(NULL), _g_adj_mat(NULL), _l_adj_mat(NULL) { - const unsigned nnz = iA[n]; + const unsigned nnz = iA[n]; - // create adjacency matrix - unsigned *row_ptr = new unsigned[n + 1]; - for (unsigned k = 0; k <= n; ++k) - row_ptr[k] = iA[k]; - unsigned *col_idx = new unsigned[nnz]; - for (unsigned k = 0; k < nnz; ++k) - col_idx[k] = jA[k]; + // create adjacency matrix + unsigned *row_ptr = new unsigned[n + 1]; + for (unsigned k = 0; k <= n; ++k) + row_ptr[k] = iA[k]; + unsigned *col_idx = new unsigned[nnz]; + for (unsigned k = 0; k < nnz; ++k) + col_idx[k] = jA[k]; - _l_adj_mat = new AdjMat(n, row_ptr, col_idx); - _l_adj_mat->makeSymmetric(); + _l_adj_mat = new AdjMat(n, row_ptr, col_idx); + _l_adj_mat->makeSymmetric(); - // make a copy of the local row_ptr array - unsigned const* l_row_ptr(_l_adj_mat->getRowPtrArray()); - unsigned *g_row_ptr(new unsigned[n + 1]); - for (unsigned k = 0; k <= n; ++k) - g_row_ptr[k] = l_row_ptr[k]; - // make a copy of the local col_idx array - unsigned const* l_col_idx(_l_adj_mat->getColIdxArray()); - const unsigned g_nnz(g_row_ptr[n]); - unsigned *g_col_idx(new unsigned[g_nnz]); - for (unsigned k = 0; k < g_nnz; ++k) - g_col_idx[k] = l_col_idx[k]; - // generate global matrix from local matrix - // (only in the root of cluster tree) - _g_adj_mat = new AdjMat(n, g_row_ptr, g_col_idx); + // make a copy of the local row_ptr array + unsigned const* l_row_ptr(_l_adj_mat->getRowPtrArray()); + unsigned *g_row_ptr(new unsigned[n + 1]); + for (unsigned k = 0; k <= n; ++k) + g_row_ptr[k] = l_row_ptr[k]; + // make a copy of the local col_idx array + unsigned const* l_col_idx(_l_adj_mat->getColIdxArray()); + const unsigned g_nnz(g_row_ptr[n]); + unsigned *g_col_idx(new unsigned[g_nnz]); + for (unsigned k = 0; k < g_nnz; ++k) + g_col_idx[k] = l_col_idx[k]; + // generate global matrix from local matrix + // (only in the root of cluster tree) + _g_adj_mat = new AdjMat(n, g_row_ptr, g_col_idx); } ClusterBase::ClusterBase(ClusterBase *father, unsigned beg, unsigned end, - unsigned* op_perm, unsigned* po_perm, AdjMat* global_mat, AdjMat* local_mat) : - _beg(beg), _end(end), _n_sons(0), _sons(NULL), _parent(father), - _g_op_perm(op_perm), _g_po_perm(po_perm), _g_adj_mat(global_mat), - _l_adj_mat(local_mat) + unsigned* op_perm, unsigned* po_perm, AdjMat* global_mat, AdjMat* local_mat) : + _beg(beg), _end(end), _n_sons(0), _sons(NULL), _parent(father), + _g_op_perm(op_perm), _g_po_perm(po_perm), _g_adj_mat(global_mat), + _l_adj_mat(local_mat) { } ClusterBase::~ClusterBase() { - if (_parent == NULL) - delete _g_adj_mat; - delete _l_adj_mat; + if (_parent == NULL) + delete _g_adj_mat; + delete _l_adj_mat; } } // end namespace MathLib diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/ClusterBase.h b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/ClusterBase.h index d93f4cfcdb7..7ed1c36943f 100644 --- a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/ClusterBase.h +++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/ClusterBase.h @@ -25,86 +25,86 @@ class AdjMat; class ClusterBase { public: - /** - * Constructor creates the root of the cluster tree - * @param n number of rows/columns - * @param iA row pointer array - * @param jA column index array - */ - ClusterBase(unsigned n, unsigned const*const iA, unsigned const*const jA); - /*! - \brief Constructor - \param father parent node in cluster tree - \param beg beginning index of the cluster - \param end beginning index of the next cluster - \param op_perm global permutation array (original_idx = op_perm[permuted_idx]) - \param po_perm global permutation array (permuted_idx = po_perm[original_idx]) - \param global_mat reference to the global adjacency matrix of the matrix graph in crs format - \param local_mat pointer to the local adjacency matrix of the matrix graph in crs format - */ - ClusterBase(ClusterBase* father, unsigned beg, unsigned end, - unsigned* op_perm, unsigned* po_perm, AdjMat* global_mat, AdjMat* local_mat); + /** + * Constructor creates the root of the cluster tree + * @param n number of rows/columns + * @param iA row pointer array + * @param jA column index array + */ + ClusterBase(unsigned n, unsigned const*const iA, unsigned const*const jA); + /*! + \brief Constructor + \param father parent node in cluster tree + \param beg beginning index of the cluster + \param end beginning index of the next cluster + \param op_perm global permutation array (original_idx = op_perm[permuted_idx]) + \param po_perm global permutation array (permuted_idx = po_perm[original_idx]) + \param global_mat reference to the global adjacency matrix of the matrix graph in crs format + \param local_mat pointer to the local adjacency matrix of the matrix graph in crs format + */ + ClusterBase(ClusterBase* father, unsigned beg, unsigned end, + unsigned* op_perm, unsigned* po_perm, AdjMat* global_mat, AdjMat* local_mat); - /** \brief Destructor. - * Destructor frees all form the objects allocated memory. - * */ - virtual ~ClusterBase(); + /** \brief Destructor. + * Destructor frees all form the objects allocated memory. + * */ + virtual ~ClusterBase(); - virtual bool isSeparator() const = 0; + virtual bool isSeparator() const = 0; #ifndef NDEBUG - AdjMat const* getGlobalAdjMat() const { return _g_adj_mat; } + AdjMat const* getGlobalAdjMat() const { return _g_adj_mat; } #endif protected: - /** \brief Method returns the pointer to the parent cluster. - \returns parent cluster */ - ClusterBase* getParent() const - { - return _parent; - } - /** - * beginning index in the global permutation arrays - */ - const unsigned _beg; + /** \brief Method returns the pointer to the parent cluster. + \returns parent cluster */ + ClusterBase* getParent() const + { + return _parent; + } + /** + * beginning index in the global permutation arrays + */ + const unsigned _beg; - /** - * beginning index of next next cluster in the global permutation arrays - */ - const unsigned _end; + /** + * beginning index of next next cluster in the global permutation arrays + */ + const unsigned _end; - /** - * number of sons, _n_sons==0 iff this is a leaf - */ - unsigned _n_sons; + /** + * number of sons, _n_sons==0 iff this is a leaf + */ + unsigned _n_sons; - /** - * the array of sons of this cluster in the cluster tree - */ - ClusterBase** _sons; + /** + * the array of sons of this cluster in the cluster tree + */ + ClusterBase** _sons; - /** - * pointer to parent - */ - ClusterBase *_parent; - /** - * pointer global permutation array (original_idx = op_perm[permuted_idx]) - */ - unsigned* _g_op_perm; - /** - * global permutation: permutation <- po_perm <- original - */ - unsigned* _g_po_perm; - /** - * pointer to global adjacency matrix - * The attribute _g_adj_mat stores the set of edges of the matrix graph $G = (V,E)$. - * (see class AdjMat) - */ - AdjMat* _g_adj_mat; - /** - * local adjacency matrix - */ - AdjMat* _l_adj_mat; + /** + * pointer to parent + */ + ClusterBase *_parent; + /** + * pointer global permutation array (original_idx = op_perm[permuted_idx]) + */ + unsigned* _g_op_perm; + /** + * global permutation: permutation <- po_perm <- original + */ + unsigned* _g_po_perm; + /** + * pointer to global adjacency matrix + * The attribute _g_adj_mat stores the set of edges of the matrix graph $G = (V,E)$. + * (see class AdjMat) + */ + AdjMat* _g_adj_mat; + /** + * local adjacency matrix + */ + AdjMat* _l_adj_mat; }; } // end namespace MathLib diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Separator.cpp b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Separator.cpp index b601040bfe9..640bdf8a66a 100644 --- a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Separator.cpp +++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Separator.cpp @@ -20,9 +20,9 @@ namespace MathLib { extern "C" void METIS_PartGraphRecursive(unsigned*, unsigned*, unsigned*, - unsigned*, unsigned*, unsigned*, - unsigned*, unsigned*, unsigned*, - unsigned*, unsigned*); + unsigned*, unsigned*, unsigned*, + unsigned*, unsigned*, unsigned*, + unsigned*, unsigned*); Separator::Separator(ClusterBase* father, unsigned beg, unsigned end, unsigned* op_perm, unsigned* po_perm, diff --git a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Separator.h b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Separator.h index 86c0546f20a..39957e8582f 100644 --- a/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Separator.h +++ b/MathLib/LinAlg/Sparse/NestedDissectionPermutation/Separator.h @@ -28,35 +28,35 @@ class AdjMat; class Separator: public ClusterBase { public: - /** brief Constructor builds a initial object for clustering - \param father pointer to the father node in cluster tree - \param beg index in op_perm and po_perm which describes the begin of the index set of the Separator - \param end index in op_perm and po_perm which describes the begin of the index set of the next - ClusterBase - \param op_perm permutation - \param po_perm inverse permutation - \param global_mat reference to adjacency matrix of the matrix graph in crs format - \param local_mat reference to the local adjacency matrix of the matrix graph in crs format - */ - Separator(ClusterBase* father, unsigned beg, unsigned end, - unsigned* op_perm, unsigned* po_perm, AdjMat* global_mat, - AdjMat* local_mat); + /** brief Constructor builds a initial object for clustering + \param father pointer to the father node in cluster tree + \param beg index in op_perm and po_perm which describes the begin of the index set of the Separator + \param end index in op_perm and po_perm which describes the begin of the index set of the next + ClusterBase + \param op_perm permutation + \param po_perm inverse permutation + \param global_mat reference to adjacency matrix of the matrix graph in crs format + \param local_mat reference to the local adjacency matrix of the matrix graph in crs format + */ + Separator(ClusterBase* father, unsigned beg, unsigned end, + unsigned* op_perm, unsigned* po_perm, AdjMat* global_mat, + AdjMat* local_mat); - /** Destructor. */ - virtual ~Separator(); + /** Destructor. */ + virtual ~Separator(); - /** Method returns the status of this ClusterAlg object. Instances - of this class are Separators. - \returns true - */ - virtual bool isSeparator() const - { - return true; - } + /** Method returns the status of this ClusterAlg object. Instances + of this class are Separators. + \returns true + */ + virtual bool isSeparator() const + { + return true; + } private: - /** update perm */ - unsigned updatePerm(unsigned *reordering, unsigned* l_op_perm, unsigned* l_po_perm); + /** update perm */ + unsigned updatePerm(unsigned *reordering, unsigned* l_op_perm, unsigned* l_po_perm); }; } // end namespace MathLib diff --git a/MathLib/LinAlg/Sparse/SparseMatrixBase.h b/MathLib/LinAlg/Sparse/SparseMatrixBase.h index 579d1ad2082..69d3c3a432a 100644 --- a/MathLib/LinAlg/Sparse/SparseMatrixBase.h +++ b/MathLib/LinAlg/Sparse/SparseMatrixBase.h @@ -6,41 +6,41 @@ namespace MathLib { template<typename FP_TYPE, typename IDX_TYPE> class SparseMatrixBase { public: - SparseMatrixBase(IDX_TYPE n1, IDX_TYPE n2) : - _n_rows(n1), _n_cols(n2) - {} - SparseMatrixBase() : - _n_rows(static_cast<IDX_TYPE>(0)), _n_cols(static_cast<IDX_TYPE>(0)) - {} - /** - * y = d * A * x - * @param d scalar factor - * @param x vector to multiply with - * @param y result vector - */ - virtual void amux(FP_TYPE const d, FP_TYPE const* const __restrict__ x, - FP_TYPE* __restrict__ y) const = 0; - virtual ~SparseMatrixBase() {} - /** - * get the number of rows - * @return the number of rows - */ - IDX_TYPE getNRows () const { return _n_rows; } - /** - * get the number of columns - * @return the number of columns - */ - IDX_TYPE getNCols () const { return _n_cols; } + SparseMatrixBase(IDX_TYPE n1, IDX_TYPE n2) : + _n_rows(n1), _n_cols(n2) + {} + SparseMatrixBase() : + _n_rows(static_cast<IDX_TYPE>(0)), _n_cols(static_cast<IDX_TYPE>(0)) + {} + /** + * y = d * A * x + * @param d scalar factor + * @param x vector to multiply with + * @param y result vector + */ + virtual void amux(FP_TYPE const d, FP_TYPE const* const __restrict__ x, + FP_TYPE* __restrict__ y) const = 0; + virtual ~SparseMatrixBase() {} + /** + * get the number of rows + * @return the number of rows + */ + IDX_TYPE getNRows () const { return _n_rows; } + /** + * get the number of columns + * @return the number of columns + */ + IDX_TYPE getNCols () const { return _n_cols; } protected: - /** - * the number of rows - */ - IDX_TYPE _n_rows; - /** - * the number of columns - */ - IDX_TYPE _n_cols; + /** + * the number of rows + */ + IDX_TYPE _n_rows; + /** + * the number of columns + */ + IDX_TYPE _n_cols; }; } // end namespace MathLib diff --git a/MathLib/LinAlg/Sparse/amuxCRS.cpp b/MathLib/LinAlg/Sparse/amuxCRS.cpp index e7975d11e2c..702f9e6fc73 100644 --- a/MathLib/LinAlg/Sparse/amuxCRS.cpp +++ b/MathLib/LinAlg/Sparse/amuxCRS.cpp @@ -25,153 +25,153 @@ namespace MathLib { struct MatMultThreadParam { - MatMultThreadParam (double scalar_factor, unsigned beg_row, unsigned end_row, - unsigned const * const iA, unsigned const * const jA, - double const * const A, double const * const x, double* y) : - _a (scalar_factor), _beg_row(beg_row), _end_row(end_row), - _row_ptr(iA), _col_idx(jA), _data(A), _x(x), _y(y) - {} - - double _a; - unsigned _beg_row; - unsigned _end_row; - unsigned const * const _row_ptr; - unsigned const * const _col_idx; - double const * const _data; - double const * const _x; - double * _y; + MatMultThreadParam (double scalar_factor, unsigned beg_row, unsigned end_row, + unsigned const * const iA, unsigned const * const jA, + double const * const A, double const * const x, double* y) : + _a (scalar_factor), _beg_row(beg_row), _end_row(end_row), + _row_ptr(iA), _col_idx(jA), _data(A), _x(x), _y(y) + {} + + double _a; + unsigned _beg_row; + unsigned _end_row; + unsigned const * const _row_ptr; + unsigned const * const _col_idx; + double const * const _data; + double const * const _x; + double * _y; }; extern "C" { void* amuxCRSpthread (void* ptr) { - MatMultThreadParam *thread_param(static_cast<MatMultThreadParam*>(ptr)); - const double a(thread_param->_a); - const unsigned beg_row(thread_param->_beg_row); - const unsigned end_row(thread_param->_end_row); - unsigned const * const iA(thread_param->_row_ptr); - unsigned const * const jA(thread_param->_col_idx); - double const * const A(thread_param->_data); - double const * const x(thread_param->_x); - double* y(thread_param->_y); - - for (unsigned i(beg_row); i<end_row; i++) { - y[i] = A[iA[i]] * x[jA[iA[i]]]; - const unsigned end (iA[i+1]); - for (unsigned j(iA[i]+1); j<end; j++) { - y[i] += A[j] * x[jA[j]]; - } - y[i] *= a; - } - return NULL; + MatMultThreadParam *thread_param(static_cast<MatMultThreadParam*>(ptr)); + const double a(thread_param->_a); + const unsigned beg_row(thread_param->_beg_row); + const unsigned end_row(thread_param->_end_row); + unsigned const * const iA(thread_param->_row_ptr); + unsigned const * const jA(thread_param->_col_idx); + double const * const A(thread_param->_data); + double const * const x(thread_param->_x); + double* y(thread_param->_y); + + for (unsigned i(beg_row); i<end_row; i++) { + y[i] = A[iA[i]] * x[jA[iA[i]]]; + const unsigned end (iA[i+1]); + for (unsigned j(iA[i]+1); j<end; j++) { + y[i] += A[j] * x[jA[j]]; + } + y[i] *= a; + } + return NULL; } } // end extern "C" void amuxCRSParallelPThreads (double a, - unsigned n, unsigned const * const iA, unsigned const * const jA, - double const * const A, double const * const x, double* y, - unsigned num_of_pthreads) + unsigned n, unsigned const * const iA, unsigned const * const jA, + double const * const A, double const * const x, double* y, + unsigned num_of_pthreads) { #ifdef HAVE_PTHREADS - // fill thread data objects - MatMultThreadParam** thread_param_array (new MatMultThreadParam*[num_of_pthreads]); - double step_size (static_cast<double>(n)/(num_of_pthreads)); - for (unsigned k(0); k<num_of_pthreads; k++) { - const unsigned beg (static_cast<unsigned>(k*step_size)); - const unsigned end (static_cast<unsigned>((k+1)*step_size)); - thread_param_array[k] = new MatMultThreadParam (a, beg, end, iA, jA, A, x, y); - } - - // allocate thread_array and return value array - pthread_t *thread_array (new pthread_t[num_of_pthreads]); - int *ret_vals (new int[num_of_pthreads]); - - // create threads - for (unsigned k(0); k<num_of_pthreads; k++) { - ret_vals[k] = pthread_create( &(thread_array[k]), NULL, amuxCRSpthread, thread_param_array[k]); - } - - // join threads - for (unsigned k(0); k<num_of_pthreads; k++) { - pthread_join (thread_array[k], NULL); - } - - delete [] ret_vals; - for (unsigned k(0); k<num_of_pthreads; k++) - delete thread_param_array[k]; - delete [] thread_param_array; - delete [] thread_array; + // fill thread data objects + MatMultThreadParam** thread_param_array (new MatMultThreadParam*[num_of_pthreads]); + double step_size (static_cast<double>(n)/(num_of_pthreads)); + for (unsigned k(0); k<num_of_pthreads; k++) { + const unsigned beg (static_cast<unsigned>(k*step_size)); + const unsigned end (static_cast<unsigned>((k+1)*step_size)); + thread_param_array[k] = new MatMultThreadParam (a, beg, end, iA, jA, A, x, y); + } + + // allocate thread_array and return value array + pthread_t *thread_array (new pthread_t[num_of_pthreads]); + int *ret_vals (new int[num_of_pthreads]); + + // create threads + for (unsigned k(0); k<num_of_pthreads; k++) { + ret_vals[k] = pthread_create( &(thread_array[k]), NULL, amuxCRSpthread, thread_param_array[k]); + } + + // join threads + for (unsigned k(0); k<num_of_pthreads; k++) { + pthread_join (thread_array[k], NULL); + } + + delete [] ret_vals; + for (unsigned k(0); k<num_of_pthreads; k++) + delete thread_param_array[k]; + delete [] thread_param_array; + delete [] thread_array; #else - (void)num_of_pthreads; - amuxCRS (a, n, iA, jA, A, x, y); + (void)num_of_pthreads; + amuxCRS (a, n, iA, jA, A, x, y); #endif } void amuxCRSParallelPThreads (double a, - unsigned n, unsigned const * const iA, unsigned const * const jA, - double const * const A, double const * const x, double* y, - unsigned num_of_pthreads, unsigned const*const workload_intervals) + unsigned n, unsigned const * const iA, unsigned const * const jA, + double const * const A, double const * const x, double* y, + unsigned num_of_pthreads, unsigned const*const workload_intervals) { (void) n; // Unused if HAVE_PTHREADS is not defined. #ifdef HAVE_PTHREADS - // fill thread data objects - MatMultThreadParam** thread_param_array (new MatMultThreadParam*[num_of_pthreads]); - for (unsigned k(0); k<num_of_pthreads; k++) { - thread_param_array[k] = new MatMultThreadParam (a, workload_intervals[k], workload_intervals[k+1], iA, jA, A, x, y); - } - - // allocate thread_array and return value array - pthread_t *thread_array (new pthread_t[num_of_pthreads]); - int *ret_vals (new int[num_of_pthreads]); - - // create threads - for (unsigned k(0); k<num_of_pthreads; k++) { - ret_vals[k] = pthread_create( &(thread_array[k]), NULL, amuxCRSpthread, thread_param_array[k]); - } - - // join threads - for (unsigned k(0); k<num_of_pthreads; k++) { - pthread_join (thread_array[k], NULL); - } - - delete [] ret_vals; - for (unsigned k(0); k<num_of_pthreads; k++) - delete thread_param_array[k]; - delete [] thread_param_array; - delete [] thread_array; + // fill thread data objects + MatMultThreadParam** thread_param_array (new MatMultThreadParam*[num_of_pthreads]); + for (unsigned k(0); k<num_of_pthreads; k++) { + thread_param_array[k] = new MatMultThreadParam (a, workload_intervals[k], workload_intervals[k+1], iA, jA, A, x, y); + } + + // allocate thread_array and return value array + pthread_t *thread_array (new pthread_t[num_of_pthreads]); + int *ret_vals (new int[num_of_pthreads]); + + // create threads + for (unsigned k(0); k<num_of_pthreads; k++) { + ret_vals[k] = pthread_create( &(thread_array[k]), NULL, amuxCRSpthread, thread_param_array[k]); + } + + // join threads + for (unsigned k(0); k<num_of_pthreads; k++) { + pthread_join (thread_array[k], NULL); + } + + delete [] ret_vals; + for (unsigned k(0); k<num_of_pthreads; k++) + delete thread_param_array[k]; + delete [] thread_param_array; + delete [] thread_array; #else - (void)num_of_pthreads; - amuxCRS (a, n, iA, jA, A, x, y); + (void)num_of_pthreads; + amuxCRS (a, n, iA, jA, A, x, y); #endif } void amuxCRSSym (double a, - unsigned n, unsigned const * const iA, unsigned const * const jA, + unsigned n, unsigned const * const iA, unsigned const * const jA, double const * const A, double const * const x, double* y) { - for (unsigned i(0); i<n; i++) { - y[i] = 0.0; - } - - for (unsigned i(0); i<n; i++) { - unsigned j (iA[i]); - // handle diagonal - if (jA[j] == i) { - y[i] += A[j] * x[jA[j]]; - j++; - } - const unsigned end (iA[i+1]); - for (; j<end; j++) { - y[i] += A[j] * x[jA[j]]; - y[jA[j]] += A[j] * x[i]; - } - } - - for (unsigned i(0); i<n; i++) { - y[i] *= a; - } + for (unsigned i(0); i<n; i++) { + y[i] = 0.0; + } + + for (unsigned i(0); i<n; i++) { + unsigned j (iA[i]); + // handle diagonal + if (jA[j] == i) { + y[i] += A[j] * x[jA[j]]; + j++; + } + const unsigned end (iA[i+1]); + for (; j<end; j++) { + y[i] += A[j] * x[jA[j]]; + y[jA[j]] += A[j] * x[i]; + } + } + + for (unsigned i(0); i<n; i++) { + y[i] *= a; + } } } // end namespace MathLib diff --git a/MathLib/LinAlg/Sparse/amuxCRS.h b/MathLib/LinAlg/Sparse/amuxCRS.h index bae40234834..94c26d308e9 100644 --- a/MathLib/LinAlg/Sparse/amuxCRS.h +++ b/MathLib/LinAlg/Sparse/amuxCRS.h @@ -19,27 +19,27 @@ namespace MathLib { template<typename FP_TYPE, typename IDX_TYPE> void amuxCRS(FP_TYPE a, IDX_TYPE n, IDX_TYPE const * const iA, IDX_TYPE const * const jA, - FP_TYPE const * const A, FP_TYPE const * const x, FP_TYPE* y) + FP_TYPE const * const A, FP_TYPE const * const x, FP_TYPE* y) { - for (IDX_TYPE i(0); i < n; i++) { - const IDX_TYPE end(iA[i + 1]); - y[i] = A[iA[i]] * x[jA[iA[i]]]; - for (IDX_TYPE j(iA[i]+1); j < end; j++) { - y[i] += A[j] * x[jA[j]]; - } - y[i] *= a; - } + for (IDX_TYPE i(0); i < n; i++) { + const IDX_TYPE end(iA[i + 1]); + y[i] = A[iA[i]] * x[jA[iA[i]]]; + for (IDX_TYPE j(iA[i]+1); j < end; j++) { + y[i] += A[j] * x[jA[j]]; + } + y[i] *= a; + } } void amuxCRSParallelPThreads (double a, - unsigned n, unsigned const * const iA, unsigned const * const jA, - double const * const A, double const * const x, double* y, - unsigned num_of_pthreads); + unsigned n, unsigned const * const iA, unsigned const * const jA, + double const * const A, double const * const x, double* y, + unsigned num_of_pthreads); void amuxCRSParallelPThreads (double a, - unsigned n, unsigned const * const iA, unsigned const * const jA, - double const * const A, double const * const x, double* y, - unsigned num_of_pthreads, unsigned const*const workload_intervals); + unsigned n, unsigned const * const iA, unsigned const * const jA, + double const * const A, double const * const x, double* y, + unsigned num_of_pthreads, unsigned const*const workload_intervals); #ifdef _OPENMP template<typename FP_TYPE, typename IDX_TYPE> @@ -48,24 +48,24 @@ void amuxCRSParallelOpenMP (FP_TYPE a, unsigned n, IDX_TYPE const * const __restrict__ jA, FP_TYPE const * const A, FP_TYPE const * const __restrict__ x, FP_TYPE* __restrict__ y) { - OPENMP_LOOP_TYPE i; + OPENMP_LOOP_TYPE i; IDX_TYPE j; FP_TYPE t; - { + { #pragma omp parallel for private(i, j, t) #ifdef WIN32 #pragma warning ( push ) #pragma warning ( disable: 4018 ) #endif - for (i = 0; i < n; i++) { - const IDX_TYPE end(iA[i + 1]); - t = A[iA[i]] * x[jA[iA[i]]]; - for (j = iA[i]+1; j < end; j++) { - t += A[j] * x[jA[j]]; - } + for (i = 0; i < n; i++) { + const IDX_TYPE end(iA[i + 1]); + t = A[iA[i]] * x[jA[iA[i]]]; + for (j = iA[i]+1; j < end; j++) { + t += A[j] * x[jA[j]]; + } y[i] = t * a; - } - } + } + } #ifdef WIN32 #pragma warning ( pop ) #endif @@ -73,7 +73,7 @@ void amuxCRSParallelOpenMP (FP_TYPE a, unsigned n, #endif void amuxCRSSym (double a, - unsigned n, unsigned const * const iA, unsigned const * const jA, + unsigned n, unsigned const * const iA, unsigned const * const jA, double const * const A, double const * const x, double* y); } // end namespace MathLib diff --git a/MathLib/LinAlg/Sparse/sparse.h b/MathLib/LinAlg/Sparse/sparse.h index 1a44eec3a8c..839890eca81 100644 --- a/MathLib/LinAlg/Sparse/sparse.h +++ b/MathLib/LinAlg/Sparse/sparse.h @@ -23,41 +23,41 @@ template<class T> void CS_write(std::ostream &os, unsigned n, unsigned const* iA, unsigned const* jA, T const* A) { - os.write(reinterpret_cast<char*>(&n), sizeof(unsigned)); - os.write(reinterpret_cast<char*>(const_cast<unsigned*>(iA)), (n + 1) * sizeof(unsigned)); - os.write(reinterpret_cast<char*>(const_cast<unsigned*>(jA)), iA[n] * sizeof(unsigned)); - os.write(reinterpret_cast<char const*>(A), iA[n] * sizeof(T)); + os.write(reinterpret_cast<char*>(&n), sizeof(unsigned)); + os.write(reinterpret_cast<char*>(const_cast<unsigned*>(iA)), (n + 1) * sizeof(unsigned)); + os.write(reinterpret_cast<char*>(const_cast<unsigned*>(jA)), iA[n] * sizeof(unsigned)); + os.write(reinterpret_cast<char const*>(A), iA[n] * sizeof(T)); } template<class T> void CS_read(std::istream &is, unsigned &n, unsigned* &iA, unsigned* &jA, T* &A) { - is.read(reinterpret_cast<char*>(&n), sizeof(unsigned)); - if (iA != NULL) { - delete[] iA; - delete[] jA; - delete[] A; - } - iA = new unsigned[n + 1]; - assert(iA != NULL); - is.read(reinterpret_cast<char*>(iA), (n + 1) * sizeof(unsigned)); + is.read(reinterpret_cast<char*>(&n), sizeof(unsigned)); + if (iA != NULL) { + delete[] iA; + delete[] jA; + delete[] A; + } + iA = new unsigned[n + 1]; + assert(iA != NULL); + is.read(reinterpret_cast<char*>(iA), (n + 1) * sizeof(unsigned)); - jA = new unsigned[iA[n]]; - assert(jA != NULL); - is.read(reinterpret_cast<char*>(jA), iA[n] * sizeof(unsigned)); + jA = new unsigned[iA[n]]; + assert(jA != NULL); + is.read(reinterpret_cast<char*>(jA), iA[n] * sizeof(unsigned)); - A = new T[iA[n]]; - assert(A != NULL); - is.read(reinterpret_cast<char*>(A), iA[n] * sizeof(T)); + A = new T[iA[n]]; + assert(A != NULL); + is.read(reinterpret_cast<char*>(A), iA[n] * sizeof(T)); #ifndef NDEBUG - // do simple checks - if (iA[0] != 0) std::cerr << "\nCRS matrix: array iA doesn't start with 0\n"; + // do simple checks + if (iA[0] != 0) std::cerr << "\nCRS matrix: array iA doesn't start with 0\n"; - unsigned i = 0; - while (i < iA[n] && jA[i] < n) - ++i; - if (i < iA[n]) std::cerr << "\nCRS matrix: the " << i - << "th entry of jA has the value " << jA[i] << ", which is out of bounds.\n"; + unsigned i = 0; + while (i < iA[n] && jA[i] < n) + ++i; + if (i < iA[n]) std::cerr << "\nCRS matrix: the " << i + << "th entry of jA has the value " << jA[i] << ", which is out of bounds.\n"; #endif } diff --git a/MathLib/LinearFunction.h b/MathLib/LinearFunction.h index 4e902f3e39e..776838e7ddc 100644 --- a/MathLib/LinearFunction.h +++ b/MathLib/LinearFunction.h @@ -22,7 +22,7 @@ namespace MathLib /** * Linear function * \f[ - * f(x_1,...,x_k)=a_0+a_1*x_1+...+a_k*x_k + * f(x_1,...,x_k)=a_0+a_1*x_1+...+a_k*x_k * \f] * * \tparam T_TYPE value type @@ -32,27 +32,27 @@ template <typename T_TYPE, unsigned N_VARS> class LinearFunction { public: - /** - * Constructor - * \param coefficients an array of coefficients of a linear function. - * The size of the coefficient array should equal to the number of variables + 1 - */ - explicit LinearFunction(const std::array<T_TYPE, N_VARS+1> &coefficients) - : _coefficients(coefficients) - {} - - /** - * evaluate the function - * \param x an array of variables. the size of the array should equal to the number of variables - */ - T_TYPE operator()(T_TYPE const * const x) const - { - return std::inner_product(_coefficients.cbegin()+1, _coefficients.cend(), x, _coefficients.front()); - } + /** + * Constructor + * \param coefficients an array of coefficients of a linear function. + * The size of the coefficient array should equal to the number of variables + 1 + */ + explicit LinearFunction(const std::array<T_TYPE, N_VARS+1> &coefficients) + : _coefficients(coefficients) + {} + + /** + * evaluate the function + * \param x an array of variables. the size of the array should equal to the number of variables + */ + T_TYPE operator()(T_TYPE const * const x) const + { + return std::inner_product(_coefficients.cbegin()+1, _coefficients.cend(), x, _coefficients.front()); + } private: - /// Coefficients of a linear function - const std::array<T_TYPE, N_VARS+1> _coefficients; + /// Coefficients of a linear function + const std::array<T_TYPE, N_VARS+1> _coefficients; }; } // MathLib diff --git a/MathLib/MathTools.cpp b/MathLib/MathTools.cpp index ce7e72dd1fb..6c7713e60ed 100644 --- a/MathLib/MathTools.cpp +++ b/MathLib/MathTools.cpp @@ -20,37 +20,37 @@ namespace MathLib { void crossProd(const double u[3], const double v[3], double r[3]) { - r[0] = u[1] * v[2] - u[2] * v[1]; - r[1] = u[2] * v[0] - u[0] * v[2]; - r[2] = u[0] * v[1] - u[1] * v[0]; + r[0] = u[1] * v[2] - u[2] * v[1]; + r[1] = u[2] * v[0] - u[0] * v[2]; + r[2] = u[0] * v[1] - u[1] * v[0]; } double calcProjPntToLineAndDists(const double p[3], const double a[3], - const double b[3], double &lambda, double &d0) + const double b[3], double &lambda, double &d0) { - // g (lambda) = a + lambda v, v = b-a - double v[3] = {b[0] - a[0], b[1] - a[1], b[2] - a[2]}; - // orthogonal projection: (g(lambda)-p) * v = 0 => in order to compute lambda we define a help vector u - double u[3] = {p[0] - a[0], p[1] - a[1], p[2] - a[2]}; - lambda = scalarProduct<double,3> (u, v) / scalarProduct<double,3> (v, v); + // g (lambda) = a + lambda v, v = b-a + double v[3] = {b[0] - a[0], b[1] - a[1], b[2] - a[2]}; + // orthogonal projection: (g(lambda)-p) * v = 0 => in order to compute lambda we define a help vector u + double u[3] = {p[0] - a[0], p[1] - a[1], p[2] - a[2]}; + lambda = scalarProduct<double,3> (u, v) / scalarProduct<double,3> (v, v); - // compute projected point - double proj_pnt[3]; - for (std::size_t k(0); k < 3; k++) - proj_pnt[k] = a[k] + lambda * v[k]; + // compute projected point + double proj_pnt[3]; + for (std::size_t k(0); k < 3; k++) + proj_pnt[k] = a[k] + lambda * v[k]; - d0 = std::sqrt (sqrDist (proj_pnt, a)); + d0 = std::sqrt (sqrDist (proj_pnt, a)); - return std::sqrt (sqrDist (p, proj_pnt)); + return std::sqrt (sqrDist (p, proj_pnt)); } double getAngle (const double p0[3], const double p1[3], const double p2[3]) { - const double v0[3] = {p0[0]-p1[0], p0[1]-p1[1], p0[2]-p1[2]}; - const double v1[3] = {p2[0]-p1[0], p2[1]-p1[1], p2[2]-p1[2]}; + const double v0[3] = {p0[0]-p1[0], p0[1]-p1[1], p0[2]-p1[2]}; + const double v1[3] = {p2[0]-p1[0], p2[1]-p1[1], p2[2]-p1[2]}; - // apply Cauchy Schwarz inequality - return std::acos (scalarProduct<double,3> (v0,v1) / (std::sqrt(scalarProduct<double,3>(v0,v0)) * sqrt(scalarProduct<double,3>(v1,v1)))); + // apply Cauchy Schwarz inequality + return std::acos (scalarProduct<double,3> (v0,v1) / (std::sqrt(scalarProduct<double,3>(v0,v0)) * sqrt(scalarProduct<double,3>(v1,v1)))); } diff --git a/MathLib/MathTools.h b/MathLib/MathTools.h index 23cb4166ece..4d6b34f993f 100644 --- a/MathLib/MathTools.h +++ b/MathLib/MathTools.h @@ -31,53 +31,53 @@ namespace MathLib template<typename T, int N> inline T scalarProduct(T const * const v0, T const * const v1) { - T res (v0[0] * v1[0]); + T res (v0[0] * v1[0]); #ifdef _OPENMP - OPENMP_LOOP_TYPE k; + OPENMP_LOOP_TYPE k; #pragma omp parallel for reduction (+:res) - for (k = 1; k<N; k++) { - res += v0[k] * v1[k]; - } + for (k = 1; k<N; k++) { + res += v0[k] * v1[k]; + } #else - for (std::size_t k(1); k < N; k++) - res += v0[k] * v1[k]; + for (std::size_t k(1); k < N; k++) + res += v0[k] * v1[k]; #endif - return res; + return res; } template <> inline double scalarProduct<double,3>(double const * const v0, double const * const v1) { - double res (v0[0] * v1[0]); - for (std::size_t k(1); k < 3; k++) - res += v0[k] * v1[k]; - return res; + double res (v0[0] * v1[0]); + for (std::size_t k(1); k < 3; k++) + res += v0[k] * v1[k]; + return res; } template<typename T> inline T scalarProduct(T const * const v0, T const * const v1, unsigned n) { - T res (v0[0] * v1[0]); + T res (v0[0] * v1[0]); #ifdef _OPENMP - OPENMP_LOOP_TYPE k; + OPENMP_LOOP_TYPE k; #pragma omp parallel for reduction (+:res) #ifdef WIN32 #pragma warning ( push ) #pragma warning ( disable: 4018 ) #endif - for (k = 1; k<n; k++) { - res += v0[k] * v1[k]; - } + for (k = 1; k<n; k++) { + res += v0[k] * v1[k]; + } #ifdef WIN32 #pragma warning ( pop ) #endif #else - for (std::size_t k(1); k < n; k++) - res += v0[k] * v1[k]; + for (std::size_t k(1); k < n; k++) + res += v0[k] * v1[k]; #endif - return res; + return res; } /** @@ -107,8 +107,8 @@ double calcProjPntToLineAndDists(const double p[3], const double a[3], inline double sqrDist(const double* p0, const double* p1) { - const double v[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]}; - return scalarProduct<double,3>(v,v); + const double v[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]}; + return scalarProduct<double,3>(v,v); } /** diff --git a/MathLib/ODE/CVodeSolver.cpp b/MathLib/ODE/CVodeSolver.cpp index 5132cd5c6e1..23279fe46f1 100644 --- a/MathLib/ODE/CVodeSolver.cpp +++ b/MathLib/ODE/CVodeSolver.cpp @@ -32,39 +32,39 @@ */ void check_error(std::string const& f_name, int const error_flag) { - if (error_flag != CV_SUCCESS) - { - ERR("CVodeSolver: %s failed with error flag %d.", f_name.c_str(), - error_flag); - std::abort(); - } + if (error_flag != CV_SUCCESS) + { + ERR("CVodeSolver: %s failed with error flag %d.", f_name.c_str(), + error_flag); + std::abort(); + } } //! Prints some statistics about an ODE solver run. void printStats(void* cvode_mem) { - long int nst = 0, nfe = 0, nsetups = 0, nje = 0, nfeLS = 0, nni = 0, - ncfn = 0, netf = 0, nge = 0; - - check_error("CVodeGetNumSteps", CVodeGetNumSteps(cvode_mem, &nst)); - check_error("CVodeGetNumRhsEvals", CVodeGetNumRhsEvals(cvode_mem, &nfe)); - check_error("CVodeGetNumLinSolvSetups", - CVodeGetNumLinSolvSetups(cvode_mem, &nsetups)); - check_error("CVodeGetNumErrTestFails", - CVodeGetNumErrTestFails(cvode_mem, &netf)); - check_error("CVodeGetNumNonlinSolvIters", - CVodeGetNumNonlinSolvIters(cvode_mem, &nni)); - check_error("CVodeGetNumNonlinSolvConvFails", - CVodeGetNumNonlinSolvConvFails(cvode_mem, &ncfn)); - check_error("CVDlsGetNumJacEvals", CVDlsGetNumJacEvals(cvode_mem, &nje)); - check_error("CVDlsGetNumRhsEvals", CVDlsGetNumRhsEvals(cvode_mem, &nfeLS)); - check_error("CVodeGetNumGEvals", CVodeGetNumGEvals(cvode_mem, &nge)); - - DBUG("Sundials CVode solver. Statistics:"); - DBUG("nst = %-6ld nfe = %-6ld nsetups = %-6ld nfeLS = %-6ld nje = %ld", - nst, nfe, nsetups, nfeLS, nje); - DBUG("nni = %-6ld ncfn = %-6ld netf = %-6ld nge = %ld\n", nni, ncfn, - netf, nge); + long int nst = 0, nfe = 0, nsetups = 0, nje = 0, nfeLS = 0, nni = 0, + ncfn = 0, netf = 0, nge = 0; + + check_error("CVodeGetNumSteps", CVodeGetNumSteps(cvode_mem, &nst)); + check_error("CVodeGetNumRhsEvals", CVodeGetNumRhsEvals(cvode_mem, &nfe)); + check_error("CVodeGetNumLinSolvSetups", + CVodeGetNumLinSolvSetups(cvode_mem, &nsetups)); + check_error("CVodeGetNumErrTestFails", + CVodeGetNumErrTestFails(cvode_mem, &netf)); + check_error("CVodeGetNumNonlinSolvIters", + CVodeGetNumNonlinSolvIters(cvode_mem, &nni)); + check_error("CVodeGetNumNonlinSolvConvFails", + CVodeGetNumNonlinSolvConvFails(cvode_mem, &ncfn)); + check_error("CVDlsGetNumJacEvals", CVDlsGetNumJacEvals(cvode_mem, &nje)); + check_error("CVDlsGetNumRhsEvals", CVDlsGetNumRhsEvals(cvode_mem, &nfeLS)); + check_error("CVodeGetNumGEvals", CVodeGetNumGEvals(cvode_mem, &nge)); + + DBUG("Sundials CVode solver. Statistics:"); + DBUG("nst = %-6ld nfe = %-6ld nsetups = %-6ld nfeLS = %-6ld nje = %ld", + nst, nfe, nsetups, nfeLS, nje); + DBUG("nni = %-6ld ncfn = %-6ld netf = %-6ld nge = %ld\n", nni, ncfn, + netf, nge); } //! @} @@ -84,47 +84,47 @@ namespace ODE */ class CVodeSolverImpl final { - static_assert(std::is_same<realtype, double>::value, - "CVode's realtype is not the same as double"); + static_assert(std::is_same<realtype, double>::value, + "CVode's realtype is not the same as double"); public: - CVodeSolverImpl(BaseLib::ConfigTree const& config, - unsigned const num_equations); + CVodeSolverImpl(BaseLib::ConfigTree const& config, + unsigned const num_equations); - void setFunction(std::unique_ptr<detail::FunctionHandles>&& f); + void setFunction(std::unique_ptr<detail::FunctionHandles>&& f); - void preSolve(); - bool solve(const double t_end); + void preSolve(); + bool solve(const double t_end); - double const* getSolution() const { return NV_DATA_S(_y); } - double getTime() const { return _t; } - void getYDot(const double t, double const* const y, double* const y_dot); - void setTolerance(const double* abstol, const double reltol); - void setTolerance(const double abstol, const double reltol); - void setIC(const double t0, double const* const y0); + double const* getSolution() const { return NV_DATA_S(_y); } + double getTime() const { return _t; } + void getYDot(const double t, double const* const y, double* const y_dot); + void setTolerance(const double* abstol, const double reltol); + void setTolerance(const double abstol, const double reltol); + void setIC(const double t0, double const* const y0); - ~CVodeSolverImpl(); + ~CVodeSolverImpl(); private: - N_Vector _y = nullptr; //!< The solution vector. + N_Vector _y = nullptr; //!< The solution vector. - realtype _t; //! current time + realtype _t; //! current time - N_Vector _abstol = nullptr; //!< Array of absolute tolerances. - realtype _reltol; //!< Relative tolerance + N_Vector _abstol = nullptr; //!< Array of absolute tolerances. + realtype _reltol; //!< Relative tolerance - unsigned _num_equations; //!< Number of equations in the ODE system. - void* _cvode_mem; //!< CVode's internal memory + unsigned _num_equations; //!< Number of equations in the ODE system. + void* _cvode_mem; //!< CVode's internal memory - //! Function handles that compute \f$\partial \dot y/\partial y\f$ - //! and \f$\dot y\f$. - std::unique_ptr<detail::FunctionHandles> _f; + //! Function handles that compute \f$\partial \dot y/\partial y\f$ + //! and \f$\dot y\f$. + std::unique_ptr<detail::FunctionHandles> _f; - //! The multistep method used for solving the ODE. - int _linear_multistep_method = CV_ADAMS; + //! The multistep method used for solving the ODE. + int _linear_multistep_method = CV_ADAMS; - //! Either solve via fixed-point iteration or via Newton-Raphson method. - int _nonlinear_solver_iteration = CV_FUNCTIONAL; + //! Either solve via fixed-point iteration or via Newton-Raphson method. + int _nonlinear_solver_iteration = CV_FUNCTIONAL; }; //! @} @@ -132,179 +132,179 @@ private: CVodeSolverImpl::CVodeSolverImpl(const BaseLib::ConfigTree& config, const unsigned num_equations) { - if (auto const param = - config.getConfParamOptional<std::string>("linear_multistep_method")) - { - DBUG("setting linear multistep method (config: %s)", param->c_str()); - - if (*param == "Adams") - { - _linear_multistep_method = CV_ADAMS; - } - else if (*param == "BDF") - { - _linear_multistep_method = CV_BDF; - } - else - { - ERR("unknown linear multistep method: %s", param->c_str()); - std::abort(); - } - } - - if (auto const param = config.getConfParamOptional<std::string>( - "nonlinear_solver_iteration")) - { - DBUG("setting nonlinear solver iteration (config: %s)", param->c_str()); - - if (*param == "Functional") - { - _nonlinear_solver_iteration = CV_FUNCTIONAL; - } - else if (*param == "Newton") - { - _nonlinear_solver_iteration = CV_NEWTON; - } - else - { - ERR("unknown nonlinear solver iteration: %s", param->c_str()); - std::abort(); - } - } - - _y = N_VNew_Serial(num_equations); - _abstol = N_VNew_Serial(num_equations); - _num_equations = num_equations; - - _cvode_mem = - CVodeCreate(_linear_multistep_method, _nonlinear_solver_iteration); - - if (_cvode_mem == nullptr || _y == nullptr || _abstol == nullptr) - { - ERR("couldn't allocate storage for CVode solver."); - std::abort(); - } - - auto f_wrapped = [](const realtype t, const N_Vector y, N_Vector ydot, - void* function_handles) -> int - { - bool successful = - static_cast<detail::FunctionHandles*>(function_handles) - ->call(t, NV_DATA_S(y), NV_DATA_S(ydot)); - return successful ? 0 : 1; - }; - - check_error("CVodeInit", CVodeInit(_cvode_mem, f_wrapped, 0.0, _y)); + if (auto const param = + config.getConfParamOptional<std::string>("linear_multistep_method")) + { + DBUG("setting linear multistep method (config: %s)", param->c_str()); + + if (*param == "Adams") + { + _linear_multistep_method = CV_ADAMS; + } + else if (*param == "BDF") + { + _linear_multistep_method = CV_BDF; + } + else + { + ERR("unknown linear multistep method: %s", param->c_str()); + std::abort(); + } + } + + if (auto const param = config.getConfParamOptional<std::string>( + "nonlinear_solver_iteration")) + { + DBUG("setting nonlinear solver iteration (config: %s)", param->c_str()); + + if (*param == "Functional") + { + _nonlinear_solver_iteration = CV_FUNCTIONAL; + } + else if (*param == "Newton") + { + _nonlinear_solver_iteration = CV_NEWTON; + } + else + { + ERR("unknown nonlinear solver iteration: %s", param->c_str()); + std::abort(); + } + } + + _y = N_VNew_Serial(num_equations); + _abstol = N_VNew_Serial(num_equations); + _num_equations = num_equations; + + _cvode_mem = + CVodeCreate(_linear_multistep_method, _nonlinear_solver_iteration); + + if (_cvode_mem == nullptr || _y == nullptr || _abstol == nullptr) + { + ERR("couldn't allocate storage for CVode solver."); + std::abort(); + } + + auto f_wrapped = [](const realtype t, const N_Vector y, N_Vector ydot, + void* function_handles) -> int + { + bool successful = + static_cast<detail::FunctionHandles*>(function_handles) + ->call(t, NV_DATA_S(y), NV_DATA_S(ydot)); + return successful ? 0 : 1; + }; + + check_error("CVodeInit", CVodeInit(_cvode_mem, f_wrapped, 0.0, _y)); } void CVodeSolverImpl::setTolerance(const double* abstol, const double reltol) { - for (unsigned i = 0; i < _num_equations; ++i) - { - NV_Ith_S(_abstol, i) = abstol[i]; - } + for (unsigned i = 0; i < _num_equations; ++i) + { + NV_Ith_S(_abstol, i) = abstol[i]; + } - _reltol = reltol; + _reltol = reltol; } void CVodeSolverImpl::setTolerance(const double abstol, const double reltol) { - for (unsigned i = 0; i < _num_equations; ++i) - { - NV_Ith_S(_abstol, i) = abstol; - } + for (unsigned i = 0; i < _num_equations; ++i) + { + NV_Ith_S(_abstol, i) = abstol; + } - _reltol = reltol; + _reltol = reltol; } void CVodeSolverImpl::setFunction(std::unique_ptr<detail::FunctionHandles>&& f) { - _f = std::move(f); - assert(_num_equations == _f->getNumEquations()); + _f = std::move(f); + assert(_num_equations == _f->getNumEquations()); } void CVodeSolverImpl::setIC(const double t0, double const* const y0) { - for (unsigned i = 0; i < _num_equations; ++i) - { - NV_Ith_S(_y, i) = y0[i]; - } + for (unsigned i = 0; i < _num_equations; ++i) + { + NV_Ith_S(_y, i) = y0[i]; + } - _t = t0; + _t = t0; } void CVodeSolverImpl::preSolve() { - assert(_f != nullptr && "ode function handle was not provided"); - - // sets initial conditions - check_error("CVodeReInit", CVodeReInit(_cvode_mem, _t, _y)); - - check_error("CVodeSetUserData", - CVodeSetUserData(_cvode_mem, static_cast<void*>(_f.get()))); - - /* Call CVodeSVtolerances to specify the scalar relative tolerance - * and vector absolute tolerances */ - check_error("CVodeSVtolerances", - CVodeSVtolerances(_cvode_mem, _reltol, _abstol)); - - /* Call CVDense to specify the CVDENSE dense linear solver */ - check_error("CVDense", CVDense(_cvode_mem, _num_equations)); - - if (_f->hasJacobian()) - { - auto df_wrapped = [](const long N, const realtype t, const N_Vector y, - const N_Vector ydot, const DlsMat jac, - void* function_handles, N_Vector /*tmp1*/, - N_Vector /*tmp2*/, N_Vector /*tmp3*/ - ) -> int - { - (void)N; // prevent warnings during non-debug build - auto* fh = static_cast<detail::FunctionHandles*>(function_handles); - assert(N == fh->getNumEquations()); - - // Caution: by calling the DENSE_COL() macro we assume that matrices - // are stored contiguously in memory! - // See also the header files sundials_direct.h and cvode_direct.h in - // the Sundials source code. The comments about the macro DENSE_COL - // in those files indicate that matrices are stored column-wise. - bool successful = fh->callJacobian(t, NV_DATA_S(y), NV_DATA_S(ydot), - DENSE_COL(jac, 0)); - return successful ? 0 : 1; - }; - - check_error("CVDlsSetDenseJacFn", - CVDlsSetDenseJacFn(_cvode_mem, df_wrapped)); - } + assert(_f != nullptr && "ode function handle was not provided"); + + // sets initial conditions + check_error("CVodeReInit", CVodeReInit(_cvode_mem, _t, _y)); + + check_error("CVodeSetUserData", + CVodeSetUserData(_cvode_mem, static_cast<void*>(_f.get()))); + + /* Call CVodeSVtolerances to specify the scalar relative tolerance + * and vector absolute tolerances */ + check_error("CVodeSVtolerances", + CVodeSVtolerances(_cvode_mem, _reltol, _abstol)); + + /* Call CVDense to specify the CVDENSE dense linear solver */ + check_error("CVDense", CVDense(_cvode_mem, _num_equations)); + + if (_f->hasJacobian()) + { + auto df_wrapped = [](const long N, const realtype t, const N_Vector y, + const N_Vector ydot, const DlsMat jac, + void* function_handles, N_Vector /*tmp1*/, + N_Vector /*tmp2*/, N_Vector /*tmp3*/ + ) -> int + { + (void)N; // prevent warnings during non-debug build + auto* fh = static_cast<detail::FunctionHandles*>(function_handles); + assert(N == fh->getNumEquations()); + + // Caution: by calling the DENSE_COL() macro we assume that matrices + // are stored contiguously in memory! + // See also the header files sundials_direct.h and cvode_direct.h in + // the Sundials source code. The comments about the macro DENSE_COL + // in those files indicate that matrices are stored column-wise. + bool successful = fh->callJacobian(t, NV_DATA_S(y), NV_DATA_S(ydot), + DENSE_COL(jac, 0)); + return successful ? 0 : 1; + }; + + check_error("CVDlsSetDenseJacFn", + CVDlsSetDenseJacFn(_cvode_mem, df_wrapped)); + } } bool CVodeSolverImpl::solve(const double t_end) { - realtype t_reached; - check_error("CVode solve", - CVode(_cvode_mem, t_end, _y, &t_reached, CV_NORMAL)); - _t = t_reached; - - // check_error asserts that t_end == t_reached and that solving the ODE - // went fine. Otherwise the program will be aborted. Therefore, we don't - // have to check manually for errors here and can always savely return true. - return true; + realtype t_reached; + check_error("CVode solve", + CVode(_cvode_mem, t_end, _y, &t_reached, CV_NORMAL)); + _t = t_reached; + + // check_error asserts that t_end == t_reached and that solving the ODE + // went fine. Otherwise the program will be aborted. Therefore, we don't + // have to check manually for errors here and can always savely return true. + return true; } void CVodeSolverImpl::getYDot(const double t, double const* const y, double* const y_dot) { - assert(_f != nullptr); - _f->call(t, y, y_dot); + assert(_f != nullptr); + _f->call(t, y, y_dot); } CVodeSolverImpl::~CVodeSolverImpl() { - printStats(_cvode_mem); + printStats(_cvode_mem); - N_VDestroy_Serial(_y); - N_VDestroy_Serial(_abstol); - CVodeFree(&_cvode_mem); + N_VDestroy_Serial(_y); + N_VDestroy_Serial(_abstol); + CVodeFree(&_cvode_mem); } CVodeSolver::CVodeSolver(BaseLib::ConfigTree const& config, @@ -315,48 +315,48 @@ CVodeSolver::CVodeSolver(BaseLib::ConfigTree const& config, void CVodeSolver::setTolerance(const double* abstol, const double reltol) { - _impl->setTolerance(abstol, reltol); + _impl->setTolerance(abstol, reltol); } void CVodeSolver::setTolerance(const double abstol, const double reltol) { - _impl->setTolerance(abstol, reltol); + _impl->setTolerance(abstol, reltol); } void CVodeSolver::setFunction(std::unique_ptr<detail::FunctionHandles>&& f) { - _impl->setFunction(std::move(f)); + _impl->setFunction(std::move(f)); } void CVodeSolver::setIC(const double t0, double const* const y0) { - _impl->setIC(t0, y0); + _impl->setIC(t0, y0); } void CVodeSolver::preSolve() { - _impl->preSolve(); + _impl->preSolve(); } bool CVodeSolver::solve(const double t_end) { - return _impl->solve(t_end); + return _impl->solve(t_end); } double const* CVodeSolver::getSolution() const { - return _impl->getSolution(); + return _impl->getSolution(); } void CVodeSolver::getYDot(const double t, double const* const y, double* const y_dot) const { - _impl->getYDot(t, y, y_dot); + _impl->getYDot(t, y, y_dot); } double CVodeSolver::getTime() const { - return _impl->getTime(); + return _impl->getTime(); } CVodeSolver::~CVodeSolver() = default; diff --git a/MathLib/ODE/CVodeSolver.h b/MathLib/ODE/CVodeSolver.h index c6f77b74500..08f42f69cb5 100644 --- a/MathLib/ODE/CVodeSolver.h +++ b/MathLib/ODE/CVodeSolver.h @@ -45,32 +45,32 @@ class CVodeSolverImpl; class CVodeSolver { protected: - //! Construct from the given \c config with storage allocated for the given - //! \c num_equations. - CVodeSolver(BaseLib::ConfigTree const& config, - unsigned const num_equations); + //! Construct from the given \c config with storage allocated for the given + //! \c num_equations. + CVodeSolver(BaseLib::ConfigTree const& config, + unsigned const num_equations); - void setTolerance(double const* const abstol, const double reltol); - void setTolerance(const double abstol, const double reltol); + void setTolerance(double const* const abstol, const double reltol); + void setTolerance(const double abstol, const double reltol); - void setFunction(std::unique_ptr<detail::FunctionHandles>&& f); + void setFunction(std::unique_ptr<detail::FunctionHandles>&& f); - void setIC(const double t0, double const* const y0); + void setIC(const double t0, double const* const y0); - void preSolve(); - bool solve(const double t_end); + void preSolve(); + bool solve(const double t_end); - double const* getSolution() const; - double getTime() const; - void getYDot(const double t, - double const* const y, - double* const y_dot) const; + double const* getSolution() const; + double getTime() const; + void getYDot(const double t, + double const* const y, + double* const y_dot) const; - ~CVodeSolver(); + ~CVodeSolver(); private: - //! pimpl idiom. - std::unique_ptr<CVodeSolverImpl> _impl; + //! pimpl idiom. + std::unique_ptr<CVodeSolverImpl> _impl; }; //! @}} diff --git a/MathLib/ODE/ConcreteODESolver.h b/MathLib/ODE/ConcreteODESolver.h index 169c02d9cba..c0acb19923c 100644 --- a/MathLib/ODE/ConcreteODESolver.h +++ b/MathLib/ODE/ConcreteODESolver.h @@ -56,63 +56,63 @@ class ConcreteODESolver final : public ODESolver<NumEquations>, private Implementation { public: - void setFunction(Function<NumEquations> f, - JacobianFunction<NumEquations> df) override - { - Implementation::setFunction( - std::unique_ptr<detail::FunctionHandlesImpl<NumEquations>>{ - new detail::FunctionHandlesImpl<NumEquations>{f, df}}); - } - - void setTolerance(const std::array<double, NumEquations>& abstol, - const double reltol) override - { - Implementation::setTolerance(abstol.data(), reltol); - } - - void setTolerance(const double abstol, const double reltol) override - { - Implementation::setTolerance(abstol, reltol); - } - - void setIC(const double t0, - std::initializer_list<double> const& y0) override - { - assert(y0.size() == NumEquations); - Implementation::setIC(t0, y0.begin()); - } - - void setIC(const double t0, - Eigen::Matrix<double, NumEquations, 1, Eigen::ColMajor> const& - y0) override - { - Implementation::setIC(t0, y0.data()); - } - - void preSolve() override { Implementation::preSolve(); } - bool solve(const double t) override { return Implementation::solve(t); } - MappedConstVector<NumEquations> getSolution() const override - { - return MappedConstVector<NumEquations>{Implementation::getSolution()}; - } - double getTime() const override { return Implementation::getTime(); } - Eigen::Matrix<double, NumEquations, 1, Eigen::ColMajor> getYDot( - const double t, const MappedConstVector<NumEquations>& y) const override - { - Eigen::Matrix<double, NumEquations, 1, Eigen::ColMajor> y_dot; - Implementation::getYDot(t, y.data(), y_dot.data()); - return y_dot; - } + void setFunction(Function<NumEquations> f, + JacobianFunction<NumEquations> df) override + { + Implementation::setFunction( + std::unique_ptr<detail::FunctionHandlesImpl<NumEquations>>{ + new detail::FunctionHandlesImpl<NumEquations>{f, df}}); + } + + void setTolerance(const std::array<double, NumEquations>& abstol, + const double reltol) override + { + Implementation::setTolerance(abstol.data(), reltol); + } + + void setTolerance(const double abstol, const double reltol) override + { + Implementation::setTolerance(abstol, reltol); + } + + void setIC(const double t0, + std::initializer_list<double> const& y0) override + { + assert(y0.size() == NumEquations); + Implementation::setIC(t0, y0.begin()); + } + + void setIC(const double t0, + Eigen::Matrix<double, NumEquations, 1, Eigen::ColMajor> const& + y0) override + { + Implementation::setIC(t0, y0.data()); + } + + void preSolve() override { Implementation::preSolve(); } + bool solve(const double t) override { return Implementation::solve(t); } + MappedConstVector<NumEquations> getSolution() const override + { + return MappedConstVector<NumEquations>{Implementation::getSolution()}; + } + double getTime() const override { return Implementation::getTime(); } + Eigen::Matrix<double, NumEquations, 1, Eigen::ColMajor> getYDot( + const double t, const MappedConstVector<NumEquations>& y) const override + { + Eigen::Matrix<double, NumEquations, 1, Eigen::ColMajor> y_dot; + Implementation::getYDot(t, y.data(), y_dot.data()); + return y_dot; + } private: - //! Instances of this class shall only be constructed by createODESolver(). - ConcreteODESolver(BaseLib::ConfigTree const& config) - : Implementation{config, NumEquations} - { - } - - friend std::unique_ptr<ODESolver<NumEquations>> - createODESolver<NumEquations>(BaseLib::ConfigTree const& config); + //! Instances of this class shall only be constructed by createODESolver(). + ConcreteODESolver(BaseLib::ConfigTree const& config) + : Implementation{config, NumEquations} + { + } + + friend std::unique_ptr<ODESolver<NumEquations>> + createODESolver<NumEquations>(BaseLib::ConfigTree const& config); }; //! @} diff --git a/MathLib/ODE/FunctionHandles.h b/MathLib/ODE/FunctionHandles.h index 2e1bf60bd65..af1b38d42f0 100644 --- a/MathLib/ODE/FunctionHandles.h +++ b/MathLib/ODE/FunctionHandles.h @@ -31,79 +31,79 @@ namespace detail class FunctionHandles { public: - //! Calls a function computing \f$\dot y\f$. - //! \returns true or false indicating whether the function succeeded. - virtual bool call(const double t, double const* const y, - double* const ydot) = 0; + //! Calls a function computing \f$\dot y\f$. + //! \returns true or false indicating whether the function succeeded. + virtual bool call(const double t, double const* const y, + double* const ydot) = 0; - //! Calls a function computing \f$\mathtt{jac} := \partial \dot y/\partial - //! y\f$. - //! \returns true or false indicating whether the function succeeded. - virtual bool callJacobian(const double t, - double const* const y, - double* const ydot, - double* const jac) = 0; + //! Calls a function computing \f$\mathtt{jac} := \partial \dot y/\partial + //! y\f$. + //! \returns true or false indicating whether the function succeeded. + virtual bool callJacobian(const double t, + double const* const y, + double* const ydot, + double* const jac) = 0; - //! Tells whether a Jacobian function has been set. - virtual bool hasJacobian() const = 0; + //! Tells whether a Jacobian function has been set. + virtual bool hasJacobian() const = 0; - //! Returns the number of equations in the ODE system. - virtual unsigned getNumEquations() const = 0; + //! Returns the number of equations in the ODE system. + virtual unsigned getNumEquations() const = 0; - virtual ~FunctionHandles() = default; + virtual ~FunctionHandles() = default; }; //! Function handles for an ODE system of \c N equations. template <unsigned N> struct FunctionHandlesImpl final : public FunctionHandles { - FunctionHandlesImpl(Function<N>& f, JacobianFunction<N>& df) : f(f), df(df) - { - } + FunctionHandlesImpl(Function<N>& f, JacobianFunction<N>& df) : f(f), df(df) + { + } - /*! Calls the stored function \c f computing \f$\dot y\f$. - * - * The raw pointers passed to this method are wrapped in some Eigen::Map - * objects before being passed to \c f. Thereby the information about the - * size of the vectors is restored. No memory is copied for that. - * - * \returns true or false indicating whether the function succeeded. - */ - bool call(const double t, const double* const y, - double* const ydot) override - { - if (f) - { - MappedVector<N> ydot_mapped{ydot}; - return f(t, MappedConstVector<N>{y}, ydot_mapped); - } - return false; - } + /*! Calls the stored function \c f computing \f$\dot y\f$. + * + * The raw pointers passed to this method are wrapped in some Eigen::Map + * objects before being passed to \c f. Thereby the information about the + * size of the vectors is restored. No memory is copied for that. + * + * \returns true or false indicating whether the function succeeded. + */ + bool call(const double t, const double* const y, + double* const ydot) override + { + if (f) + { + MappedVector<N> ydot_mapped{ydot}; + return f(t, MappedConstVector<N>{y}, ydot_mapped); + } + return false; + } - /*! Calls the stored function computing - * \f$\mathtt{jac} := \partial \dot y/\partial y\f$. - * - * \returns true or false indicating whether the function succeeded. - * \see call() - */ - bool callJacobian(const double t, const double* const y, double* const ydot, - double* const jac) override - { - if (df) - { - MappedMatrix<N, N> jac_mapped{jac}; - return df(t, - MappedConstVector<N>{y}, - MappedConstVector<N>{ydot}, - jac_mapped); - } - return false; - } + /*! Calls the stored function computing + * \f$\mathtt{jac} := \partial \dot y/\partial y\f$. + * + * \returns true or false indicating whether the function succeeded. + * \see call() + */ + bool callJacobian(const double t, const double* const y, double* const ydot, + double* const jac) override + { + if (df) + { + MappedMatrix<N, N> jac_mapped{jac}; + return df(t, + MappedConstVector<N>{y}, + MappedConstVector<N>{ydot}, + jac_mapped); + } + return false; + } - bool hasJacobian() const override { return df != nullptr; } - unsigned getNumEquations() const override { return N; } - Function<N> f; - JacobianFunction<N> df; + bool hasJacobian() const override { return df != nullptr; } + unsigned getNumEquations() const override { return N; } + Function<N> f; + JacobianFunction<N> df; }; //! @} diff --git a/MathLib/ODE/ODESolver.h b/MathLib/ODE/ODESolver.h index e951a1f6259..e0d1d0832d3 100644 --- a/MathLib/ODE/ODESolver.h +++ b/MathLib/ODE/ODESolver.h @@ -37,99 +37,99 @@ template <unsigned NumEquations> class ODESolver { public: - /*! Sets functions that compute \f$\dot y\f$ and - * the Jacobian \f$\partial \dot y/\partial y\f$. - * - * If no Jacobian function shall be set, \c nullptr can be passed fo \c df. - * - * \remark - * solve() cannot be directly called after this method, rather preSolve() - * has to be called first! - */ - virtual void setFunction(Function<NumEquations> f, - JacobianFunction<NumEquations> df) = 0; - - /*! Sets the tolerances for the ODE solver. - * - * \param abstol absolute tolerance, one value for all equations. - * \param reltol relative tolerance. - * - * \remark - * solve() cannot be directly called after this method, rather preSolve() - * has to be called first! - */ - virtual void setTolerance(const double abstol, const double reltol) = 0; - - /*! Sets the tolerances for the ODE solver. - * - * \param abstol absolute tolerance, one value each equation. - * \param reltol relative tolerance. - * - * \remark - * solve() cannot be directly called after this method, rather preSolve() - * has to be called first! - */ - virtual void setTolerance(const std::array<double, NumEquations>& abstol, - const double reltol) = 0; - - /*! Sets the conditions. - * - * \param t0 initial time. - * \param y0 initial values. - * - * \remark - * solve() cannot be directly called after this method, rather preSolve() - * has to be called first! - */ - virtual void setIC(const double t0, - std::initializer_list<double> const& y0) = 0; - - //! \overload - virtual void setIC( - const double t0, - Eigen::Matrix<double, NumEquations, 1, Eigen::ColMajor> const& y0) = 0; - - /*! Finishes setting up the ODE solver, makes it ready to solve the provided - * ODE. - * - * This method applies settings to the ODE solver, hence it has to be called - * after calling setters. - * - * \note - * preSolve() has to be called once before calling solve, it is not - * necessary - * to call it after each setter. - */ - virtual void preSolve() = 0; - - /*! Solves the ODE from the set initial condition to time \c t. - * - * \returns true or false indicating whether solving succeeded. - * - * \pre preSolve() has to be called before this method. - */ - virtual bool solve(const double t) = 0; - - //! Returns the number of equations. - virtual unsigned getNumEquations() const { return NumEquations; } - //! Returns the solution vector \c y - virtual MappedConstVector<NumEquations> getSolution() const = 0; - - /*! Returns the time that the solver has reached. - * - * The return value should be equal to the time \c t passed to solve() if - * everything went fine. - */ - virtual double getTime() const = 0; - - /*! Computes \f$ \dot y = f(t,y) \f$. - * - * This method is provided for convenience only. - */ - virtual Eigen::Matrix<double, NumEquations, 1, Eigen::ColMajor> getYDot( - const double t, const MappedConstVector<NumEquations>& y) const = 0; - - virtual ~ODESolver() = default; + /*! Sets functions that compute \f$\dot y\f$ and + * the Jacobian \f$\partial \dot y/\partial y\f$. + * + * If no Jacobian function shall be set, \c nullptr can be passed fo \c df. + * + * \remark + * solve() cannot be directly called after this method, rather preSolve() + * has to be called first! + */ + virtual void setFunction(Function<NumEquations> f, + JacobianFunction<NumEquations> df) = 0; + + /*! Sets the tolerances for the ODE solver. + * + * \param abstol absolute tolerance, one value for all equations. + * \param reltol relative tolerance. + * + * \remark + * solve() cannot be directly called after this method, rather preSolve() + * has to be called first! + */ + virtual void setTolerance(const double abstol, const double reltol) = 0; + + /*! Sets the tolerances for the ODE solver. + * + * \param abstol absolute tolerance, one value each equation. + * \param reltol relative tolerance. + * + * \remark + * solve() cannot be directly called after this method, rather preSolve() + * has to be called first! + */ + virtual void setTolerance(const std::array<double, NumEquations>& abstol, + const double reltol) = 0; + + /*! Sets the conditions. + * + * \param t0 initial time. + * \param y0 initial values. + * + * \remark + * solve() cannot be directly called after this method, rather preSolve() + * has to be called first! + */ + virtual void setIC(const double t0, + std::initializer_list<double> const& y0) = 0; + + //! \overload + virtual void setIC( + const double t0, + Eigen::Matrix<double, NumEquations, 1, Eigen::ColMajor> const& y0) = 0; + + /*! Finishes setting up the ODE solver, makes it ready to solve the provided + * ODE. + * + * This method applies settings to the ODE solver, hence it has to be called + * after calling setters. + * + * \note + * preSolve() has to be called once before calling solve, it is not + * necessary + * to call it after each setter. + */ + virtual void preSolve() = 0; + + /*! Solves the ODE from the set initial condition to time \c t. + * + * \returns true or false indicating whether solving succeeded. + * + * \pre preSolve() has to be called before this method. + */ + virtual bool solve(const double t) = 0; + + //! Returns the number of equations. + virtual unsigned getNumEquations() const { return NumEquations; } + //! Returns the solution vector \c y + virtual MappedConstVector<NumEquations> getSolution() const = 0; + + /*! Returns the time that the solver has reached. + * + * The return value should be equal to the time \c t passed to solve() if + * everything went fine. + */ + virtual double getTime() const = 0; + + /*! Computes \f$ \dot y = f(t,y) \f$. + * + * This method is provided for convenience only. + */ + virtual Eigen::Matrix<double, NumEquations, 1, Eigen::ColMajor> getYDot( + const double t, const MappedConstVector<NumEquations>& y) const = 0; + + virtual ~ODESolver() = default; }; //! @} diff --git a/MathLib/ODE/ODESolverBuilder.h b/MathLib/ODE/ODESolverBuilder.h index 88212814c97..48ddb3a769c 100644 --- a/MathLib/ODE/ODESolverBuilder.h +++ b/MathLib/ODE/ODESolverBuilder.h @@ -40,15 +40,15 @@ std::unique_ptr<ODESolver<NumEquations>> createODESolver( BaseLib::ConfigTree const& config) { #ifdef CVODE_FOUND - return std::unique_ptr<ODESolver<NumEquations>>( - new ConcreteODESolver<CVodeSolver, NumEquations>(config)); + return std::unique_ptr<ODESolver<NumEquations>>( + new ConcreteODESolver<CVodeSolver, NumEquations>(config)); #endif - (void)config; // Unused parameter warning if no library is available. + (void)config; // Unused parameter warning if no library is available. - ERR( - "No ODE solver could be created. Maybe it is because you did not build" - " OGS6 with support for any external ODE solver library."); - std::abort(); + ERR( + "No ODE solver could be created. Maybe it is because you did not build" + " OGS6 with support for any external ODE solver library."); + std::abort(); } //! @} diff --git a/MathLib/Point3d.h b/MathLib/Point3d.h index 86537f76d64..5d19c81b771 100644 --- a/MathLib/Point3d.h +++ b/MathLib/Point3d.h @@ -46,8 +46,8 @@ inline MathLib::Point3d operator*(MATRIX const& mat, MathLib::Point3d const& p) inline double sqrDist(MathLib::Point3d const& p0, MathLib::Point3d const& p1) { - const double v[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]}; - return MathLib::scalarProduct<double,3>(v,v); + const double v[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]}; + return MathLib::scalarProduct<double,3>(v,v); } /// Computes the squared distance between the orthogonal projection of the two @@ -55,7 +55,7 @@ double sqrDist(MathLib::Point3d const& p0, MathLib::Point3d const& p1) inline double sqrDist2d(MathLib::Point3d const& p0, MathLib::Point3d const& p1) { - return (p0[0]-p1[0])*(p0[0]-p1[0]) + (p0[1]-p1[1])*(p0[1]-p1[1]); + return (p0[0]-p1[0])*(p0[0]-p1[0]) + (p0[1]-p1[1])*(p0[1]-p1[1]); } } // end namespace MathLib diff --git a/MathLib/Point3dWithID.h b/MathLib/Point3dWithID.h index 3de7ee16cee..2bcf36ec2a3 100644 --- a/MathLib/Point3dWithID.h +++ b/MathLib/Point3dWithID.h @@ -26,45 +26,45 @@ namespace MathLib */ class Point3dWithID: public Point3d { public: - /// Constructs a point with the coordinates x0, x1 and x2 and the provided - /// id. - /// @param x0 x coordinate of point - /// @param x1 y coordinate of point - /// @param x2 z coordinate of point - /// @param id the id of the object [default: max of std::size_t] - Point3dWithID(double x0, double x1, double x2, - std::size_t id = std::numeric_limits<std::size_t>::max()) - : Point3d(std::array<double,3>({{x0, x1, x2}})), _id(id) - {} + /// Constructs a point with the coordinates x0, x1 and x2 and the provided + /// id. + /// @param x0 x coordinate of point + /// @param x1 y coordinate of point + /// @param x2 z coordinate of point + /// @param id the id of the object [default: max of std::size_t] + Point3dWithID(double x0, double x1, double x2, + std::size_t id = std::numeric_limits<std::size_t>::max()) + : Point3d(std::array<double,3>({{x0, x1, x2}})), _id(id) + {} - /// Constructs a point using std::array<double,3> as coordinates and - /// the provided id. - /// @param coords coordinates of the point - /// @param id the id of the object [default: max of std::size_t] - Point3dWithID(std::array<double,3> const& coords, - std::size_t id = std::numeric_limits<std::size_t>::max()) - : Point3d(coords), _id(id) - {} + /// Constructs a point using std::array<double,3> as coordinates and + /// the provided id. + /// @param coords coordinates of the point + /// @param id the id of the object [default: max of std::size_t] + Point3dWithID(std::array<double,3> const& coords, + std::size_t id = std::numeric_limits<std::size_t>::max()) + : Point3d(coords), _id(id) + {} - /// Constructs a point with the same coordinates as the given - /// Point3d pnt and the provided id. - /// @param pnt a MathLib::Point3d object containing the coordinates - /// @param id the id of the object [default: max of std::size_t] - explicit Point3dWithID(MathLib::Point3d const& pnt, - std::size_t id = std::numeric_limits<std::size_t>::max()) - : MathLib::Point3d(pnt), _id(id) - {} + /// Constructs a point with the same coordinates as the given + /// Point3d pnt and the provided id. + /// @param pnt a MathLib::Point3d object containing the coordinates + /// @param id the id of the object [default: max of std::size_t] + explicit Point3dWithID(MathLib::Point3d const& pnt, + std::size_t id = std::numeric_limits<std::size_t>::max()) + : MathLib::Point3d(pnt), _id(id) + {} - /// Default constructor that initializes the id with max of std::size_t - /// the default constructor of class Point3d. - Point3dWithID() : - Point3d(), _id(std::numeric_limits<std::size_t>::max()) - {} + /// Default constructor that initializes the id with max of std::size_t + /// the default constructor of class Point3d. + Point3dWithID() : + Point3d(), _id(std::numeric_limits<std::size_t>::max()) + {} - std::size_t getID() const { return _id; } + std::size_t getID() const { return _id; } protected: - std::size_t _id; + std::size_t _id; }; } diff --git a/MathLib/TemplatePoint.h b/MathLib/TemplatePoint.h index aca3ea2bcac..cf0d9965f68 100644 --- a/MathLib/TemplatePoint.h +++ b/MathLib/TemplatePoint.h @@ -34,75 +34,75 @@ namespace MathLib template <typename T, std::size_t DIM = 3> class TemplatePoint { public: - typedef T FP_T; - - /** default constructor with zero coordinates */ - TemplatePoint(); - - /** constructor - constructs a TemplatePoint object - * - * @param x std::array containing the coordinates of the point - */ - explicit TemplatePoint(std::array<T,DIM> const& x); - - /** virtual destructor */ - virtual ~TemplatePoint() = default; - - TemplatePoint(TemplatePoint const&) = default; - TemplatePoint& operator=(TemplatePoint const&) = default; - - /** \brief const access operator - * The access to the point coordinates is like the access to a field. Code example: - * \code - * Point<double> point (1.0, 2.0, 3.0); - * double sqrNrm2 = point[0] * point[0] + point[1] * point[1] + point[2] + point[2]; - * \endcode - */ - const T& operator[] (std::size_t idx) const - { - assert (idx < DIM); - return _x[idx]; - } - /** \brief access operator (see book Effektiv C++ programmieren - subsection 1.3.2 ). - * \sa const T& operator[] (std::size_t idx) const - */ - T& operator[] (std::size_t idx) - { - return const_cast<T&> (static_cast<const TemplatePoint&> (*this)[idx]); - } - - /** returns an array containing the coordinates of the point */ - const T* getCoords () const - { - return _x.data(); - } - - /** write point coordinates into stream (used from operator<<) - * \param os a standard output stream - */ - virtual void write (std::ostream &os) const - { - std::copy(_x.cbegin(), _x.cend(), std::ostream_iterator<T>(os, " ")); - } - - /** read point coordinates into stream (used from operator>>) */ - virtual void read (std::istream &is) - { - std::copy(std::istream_iterator<T>(is), std::istream_iterator<T>(), _x.begin()); - } + typedef T FP_T; + + /** default constructor with zero coordinates */ + TemplatePoint(); + + /** constructor - constructs a TemplatePoint object + * + * @param x std::array containing the coordinates of the point + */ + explicit TemplatePoint(std::array<T,DIM> const& x); + + /** virtual destructor */ + virtual ~TemplatePoint() = default; + + TemplatePoint(TemplatePoint const&) = default; + TemplatePoint& operator=(TemplatePoint const&) = default; + + /** \brief const access operator + * The access to the point coordinates is like the access to a field. Code example: + * \code + * Point<double> point (1.0, 2.0, 3.0); + * double sqrNrm2 = point[0] * point[0] + point[1] * point[1] + point[2] + point[2]; + * \endcode + */ + const T& operator[] (std::size_t idx) const + { + assert (idx < DIM); + return _x[idx]; + } + /** \brief access operator (see book Effektiv C++ programmieren - subsection 1.3.2 ). + * \sa const T& operator[] (std::size_t idx) const + */ + T& operator[] (std::size_t idx) + { + return const_cast<T&> (static_cast<const TemplatePoint&> (*this)[idx]); + } + + /** returns an array containing the coordinates of the point */ + const T* getCoords () const + { + return _x.data(); + } + + /** write point coordinates into stream (used from operator<<) + * \param os a standard output stream + */ + virtual void write (std::ostream &os) const + { + std::copy(_x.cbegin(), _x.cend(), std::ostream_iterator<T>(os, " ")); + } + + /** read point coordinates into stream (used from operator>>) */ + virtual void read (std::istream &is) + { + std::copy(std::istream_iterator<T>(is), std::istream_iterator<T>(), _x.begin()); + } protected: - std::array<T, DIM> _x; + std::array<T, DIM> _x; }; template <typename T, std::size_t DIM> TemplatePoint<T,DIM>::TemplatePoint() : - _x({{0}}) + _x({{0}}) {} template <typename T, std::size_t DIM> TemplatePoint<T,DIM>::TemplatePoint(std::array<T,DIM> const& x) : - _x(x) + _x(x) {} /** Equality of TemplatePoint's up to an epsilon. @@ -110,28 +110,28 @@ TemplatePoint<T,DIM>::TemplatePoint(std::array<T,DIM> const& x) : template <typename T, std::size_t DIM> bool operator==(TemplatePoint<T,DIM> const& a, TemplatePoint<T,DIM> const& b) { - T const sqr_dist(sqrDist(a,b)); - auto const eps = std::numeric_limits<T>::epsilon(); - return (sqr_dist < eps*eps); + T const sqr_dist(sqrDist(a,b)); + auto const eps = std::numeric_limits<T>::epsilon(); + return (sqr_dist < eps*eps); } template <typename T, std::size_t DIM> bool operator< (TemplatePoint<T,DIM> const& a, TemplatePoint<T,DIM> const& b) { - for (std::size_t i = 0; i < DIM; ++i) - { - if (a[i] > b[i]) { - return false; - } else { - if (a[i] < b[i]) { - return true; - } - } - // continue with next dimension, because a[0] == b[0] - } - - // The values in all dimenisions are equal. - return false; + for (std::size_t i = 0; i < DIM; ++i) + { + if (a[i] > b[i]) { + return false; + } else { + if (a[i] < b[i]) { + return true; + } + } + // continue with next dimension, because a[0] == b[0] + } + + // The values in all dimenisions are equal. + return false; } /** @@ -147,56 +147,56 @@ bool operator< (TemplatePoint<T,DIM> const& a, TemplatePoint<T,DIM> const& b) */ template <typename T, std::size_t DIM> bool lessEq(TemplatePoint<T, DIM> const& a, TemplatePoint<T, DIM> const& b, - double eps = std::numeric_limits<double>::epsilon()) + double eps = std::numeric_limits<double>::epsilon()) { - auto coordinateIsLargerEps = [&eps](T const u, T const v) -> bool - { - return std::fabs(u - v) > eps * std::min(std::fabs(v), std::fabs(u)) && - std::fabs(u - v) > eps; - }; - - for (std::size_t i = 0; i < DIM; ++i) - { - // test a relative and an absolute criterion - if (coordinateIsLargerEps(a[i], b[i])) - { - if (a[i] <= b[i]) - return true; - else - return false; - } - // a[i] ~= b[i] up to an epsilon. Compare next dimension. - } - - // all coordinates are equal up to an epsilon. - return true; + auto coordinateIsLargerEps = [&eps](T const u, T const v) -> bool + { + return std::fabs(u - v) > eps * std::min(std::fabs(v), std::fabs(u)) && + std::fabs(u - v) > eps; + }; + + for (std::size_t i = 0; i < DIM; ++i) + { + // test a relative and an absolute criterion + if (coordinateIsLargerEps(a[i], b[i])) + { + if (a[i] <= b[i]) + return true; + else + return false; + } + // a[i] ~= b[i] up to an epsilon. Compare next dimension. + } + + // all coordinates are equal up to an epsilon. + return true; } /** Distance between points p0 and p1 in the maximum norm. */ template <typename T> T maxNormDist(const MathLib::TemplatePoint<T>* p0, const MathLib::TemplatePoint<T>* p1) { - const T x = fabs((*p1)[0] - (*p0)[0]); - const T y = fabs((*p1)[1] - (*p0)[1]); - const T z = fabs((*p1)[2] - (*p0)[2]); + const T x = fabs((*p1)[0] - (*p0)[0]); + const T y = fabs((*p1)[1] - (*p0)[1]); + const T z = fabs((*p1)[2] - (*p0)[2]); - return std::max(x, std::max(y, z)); + return std::max(x, std::max(y, z)); } /** overload the output operator for class Point */ template <typename T, std::size_t DIM> std::ostream& operator<< (std::ostream &os, const TemplatePoint<T,DIM> &p) { - p.write (os); - return os; + p.write (os); + return os; } /** overload the input operator for class Point */ template <typename T, std::size_t DIM> std::istream& operator>> (std::istream &is, TemplatePoint<T,DIM> &p) { - p.read (is); - return is; + p.read (is); + return is; } } // end namespace MathLib diff --git a/MathLib/TemplateWeightedPoint.h b/MathLib/TemplateWeightedPoint.h index 3d354a180eb..9c2073924a3 100644 --- a/MathLib/TemplateWeightedPoint.h +++ b/MathLib/TemplateWeightedPoint.h @@ -22,17 +22,17 @@ template <typename FP_T, typename W_T, std::size_t DIM> class TemplateWeightedPoint : public TemplatePoint<FP_T, DIM> { public: - TemplateWeightedPoint(std::array<FP_T, DIM> const& x, W_T weight) : - TemplatePoint<FP_T, DIM>(x), _weight(weight) - {} + TemplateWeightedPoint(std::array<FP_T, DIM> const& x, W_T weight) : + TemplatePoint<FP_T, DIM>(x), _weight(weight) + {} - W_T getWeight() const - { - return _weight; - } + W_T getWeight() const + { + return _weight; + } private: - W_T const _weight; + W_T const _weight; }; typedef TemplateWeightedPoint<double, double, 1> WeightedPoint1D; diff --git a/MathLib/Vector3.h b/MathLib/Vector3.h index 7d9f0307fda..75bc03f2c1a 100644 --- a/MathLib/Vector3.h +++ b/MathLib/Vector3.h @@ -32,158 +32,158 @@ template <class T> class TemplateVector3 : public MathLib::TemplatePoint<T> { public: - /** - * Default constructor. All coordinates are set to zero. - */ - TemplateVector3() = default; - - TemplateVector3(T x0, T x1, T x2) - { - this->_x[0] = x0; - this->_x[1] = x1; - this->_x[2] = x2; - } - - /** - * Copy constructor. - */ - TemplateVector3(TemplateVector3<T> const& v) = default; - TemplateVector3<T>& operator=(TemplateVector3<T> const& v) = default; - - /** - * Construct Vector3 from TemplatePoint. - */ - TemplateVector3(TemplatePoint<T,3> const& p) : - TemplatePoint<T>(p) - {} - - /** Constructs the vector \f$v=(b-a)\f$ from the given points, - * which starts in point \f$a\f$ and ends in point \f$b\f$ - */ - TemplateVector3(const MathLib::TemplatePoint<T> &a, const MathLib::TemplatePoint<T> &b) : - MathLib::TemplatePoint<T>() - { - this->_x[0] = b[0] - a[0]; - this->_x[1] = b[1] - a[1]; - this->_x[2] = b[2] - a[2]; - } - - // vector arithmetic - TemplateVector3 operator+(TemplateVector3 const& v) const - { - return TemplateVector3(this->_x[0]+v[0], this->_x[1]+v[1], this->_x[2]+v[2]); - } - - TemplateVector3 operator-(TemplateVector3 const& v) const - { - return TemplateVector3(this->_x[0]-v[0], this->_x[1]-v[1], this->_x[2]-v[2]); - } - - TemplateVector3& operator+=(TemplateVector3 const& v) - { - for (std::size_t i(0); i < 3; i++) this->_x[i] += v[i]; - return *this; - } - - TemplateVector3& operator-=(const TemplateVector3 & pV) - { - for (std::size_t i(0); i < 3; i++) this->_x[i] -= pV[i]; - return *this; - } - - TemplateVector3& operator*=(double s) - { - for (std::size_t i(0); i < 3; i++) - this->_x[i] *= s; - return *this; - } - - /** - * After applying the normalize operator to the vector its length is 1.0. - */ - void normalize() - { - const double s(1/getLength()); - for (std::size_t i(0); i < 3; i++) - this->_x[i] *= s; - } - - /// Returns a normalized version of this vector - TemplateVector3<double> getNormalizedVector() const - { - if (getSqrLength() == 0) - return TemplateVector3<double>(0,0,0); - TemplateVector3<double> norm_vec (this->_x[0], this->_x[1], this->_x[2]); - norm_vec.normalize(); - return norm_vec; - } - - /// Returns the squared length - double getSqrLength(void) const - { - return this->_x[0]*this->_x[0] + this->_x[1]*this->_x[1] + this->_x[2]*this->_x[2]; - } - - /// Returns the length - double getLength(void) const - { - return sqrt(getSqrLength()); - } - - /** scalarProduct, implementation of scalar product, - * sometimes called dot or inner product. - */ - template <typename T1> - friend T1 scalarProduct(TemplateVector3<T1> const& v, TemplateVector3<T1> const& w); - - /** crossProduct: implementation of cross product, - * sometimes called outer product. - */ - template <typename T1> - friend TemplateVector3<T1> crossProduct( - TemplateVector3<T1> const& v, - TemplateVector3<T1> const& w); - - /** multiplication with a scalar s */ - template <typename T1> - friend TemplateVector3<T1> operator*( - TemplateVector3<T1> const& v, - double s); - template <typename T1> - friend TemplateVector3<T1> operator*( - double s, - TemplateVector3<T1> const& v); + /** + * Default constructor. All coordinates are set to zero. + */ + TemplateVector3() = default; + + TemplateVector3(T x0, T x1, T x2) + { + this->_x[0] = x0; + this->_x[1] = x1; + this->_x[2] = x2; + } + + /** + * Copy constructor. + */ + TemplateVector3(TemplateVector3<T> const& v) = default; + TemplateVector3<T>& operator=(TemplateVector3<T> const& v) = default; + + /** + * Construct Vector3 from TemplatePoint. + */ + TemplateVector3(TemplatePoint<T,3> const& p) : + TemplatePoint<T>(p) + {} + + /** Constructs the vector \f$v=(b-a)\f$ from the given points, + * which starts in point \f$a\f$ and ends in point \f$b\f$ + */ + TemplateVector3(const MathLib::TemplatePoint<T> &a, const MathLib::TemplatePoint<T> &b) : + MathLib::TemplatePoint<T>() + { + this->_x[0] = b[0] - a[0]; + this->_x[1] = b[1] - a[1]; + this->_x[2] = b[2] - a[2]; + } + + // vector arithmetic + TemplateVector3 operator+(TemplateVector3 const& v) const + { + return TemplateVector3(this->_x[0]+v[0], this->_x[1]+v[1], this->_x[2]+v[2]); + } + + TemplateVector3 operator-(TemplateVector3 const& v) const + { + return TemplateVector3(this->_x[0]-v[0], this->_x[1]-v[1], this->_x[2]-v[2]); + } + + TemplateVector3& operator+=(TemplateVector3 const& v) + { + for (std::size_t i(0); i < 3; i++) this->_x[i] += v[i]; + return *this; + } + + TemplateVector3& operator-=(const TemplateVector3 & pV) + { + for (std::size_t i(0); i < 3; i++) this->_x[i] -= pV[i]; + return *this; + } + + TemplateVector3& operator*=(double s) + { + for (std::size_t i(0); i < 3; i++) + this->_x[i] *= s; + return *this; + } + + /** + * After applying the normalize operator to the vector its length is 1.0. + */ + void normalize() + { + const double s(1/getLength()); + for (std::size_t i(0); i < 3; i++) + this->_x[i] *= s; + } + + /// Returns a normalized version of this vector + TemplateVector3<double> getNormalizedVector() const + { + if (getSqrLength() == 0) + return TemplateVector3<double>(0,0,0); + TemplateVector3<double> norm_vec (this->_x[0], this->_x[1], this->_x[2]); + norm_vec.normalize(); + return norm_vec; + } + + /// Returns the squared length + double getSqrLength(void) const + { + return this->_x[0]*this->_x[0] + this->_x[1]*this->_x[1] + this->_x[2]*this->_x[2]; + } + + /// Returns the length + double getLength(void) const + { + return sqrt(getSqrLength()); + } + + /** scalarProduct, implementation of scalar product, + * sometimes called dot or inner product. + */ + template <typename T1> + friend T1 scalarProduct(TemplateVector3<T1> const& v, TemplateVector3<T1> const& w); + + /** crossProduct: implementation of cross product, + * sometimes called outer product. + */ + template <typename T1> + friend TemplateVector3<T1> crossProduct( + TemplateVector3<T1> const& v, + TemplateVector3<T1> const& w); + + /** multiplication with a scalar s */ + template <typename T1> + friend TemplateVector3<T1> operator*( + TemplateVector3<T1> const& v, + double s); + template <typename T1> + friend TemplateVector3<T1> operator*( + double s, + TemplateVector3<T1> const& v); }; template <typename T> T scalarProduct(TemplateVector3<T> const& v, TemplateVector3<T> const& w) { - return v._x[0] * w._x[0] + v._x[1] * w._x[1] + v._x[2] * w._x[2]; + return v._x[0] * w._x[0] + v._x[1] * w._x[1] + v._x[2] * w._x[2]; } template <typename T1> TemplateVector3<T1> crossProduct( - TemplateVector3<T1> const& v, - TemplateVector3<T1> const& w) + TemplateVector3<T1> const& v, + TemplateVector3<T1> const& w) { - return TemplateVector3<T1>( - v._x[1] * w._x[2] - v._x[2] * w._x[1], - v._x[2] * w._x[0] - v._x[0] * w._x[2], - v._x[0] * w._x[1] - v._x[1] * w._x[0]); + return TemplateVector3<T1>( + v._x[1] * w._x[2] - v._x[2] * w._x[1], + v._x[2] * w._x[0] - v._x[0] * w._x[2], + v._x[0] * w._x[1] - v._x[1] * w._x[0]); } template <typename T1> TemplateVector3<T1> operator*( - TemplateVector3<T1> const& v, - double s) + TemplateVector3<T1> const& v, + double s) { - return TemplateVector3<T1>(v[0] * s, v[1] * s, v[2] * s); + return TemplateVector3<T1>(v[0] * s, v[1] * s, v[2] * s); } template <typename T1> TemplateVector3<T1> operator*( - double s, - TemplateVector3<T1> const& v) + double s, + TemplateVector3<T1> const& v) { - return v * s; + return v * s; } typedef TemplateVector3<double> Vector3; diff --git a/MathLib/vector_io.h b/MathLib/vector_io.h index 22e70b4e632..bff8f54d95d 100644 --- a/MathLib/vector_io.h +++ b/MathLib/vector_io.h @@ -31,77 +31,77 @@ typedef std::string unistring; template<typename A, typename T> inline const A lexical_cast(const T& source) { - unistringstream s; + unistringstream s; - s << source; + s << source; - A destination; - s >> destination; + A destination; + s >> destination; - return (destination); + return (destination); } /** reads the number of lines of non-binary stream */ unsigned readLines ( std::istream &in ) { - unsigned k = 0; - while (!in.eof()) { - std::string str; - getline (in, str); - k++; - } + unsigned k = 0; + while (!in.eof()) { + std::string str; + getline (in, str); + k++; + } - return k; + return k; } /** reads N values of non-binary stream */ template <class T> void read ( std::istream &in, unsigned N, T *a ) { - unsigned k = 0; - std::string ws (" \t"); + unsigned k = 0; + std::string ws (" \t"); - while (!in.eof () and k <= N) { - std::string t; - getline (in, t); - std::size_t i1; - if (t.length () != 0) { - i1 = t.find_first_not_of (ws, 0); - if (i1 != std::string::npos) { - std::size_t i2 = t.find_first_of (ws, i1); - if (i2 != std::string::npos) { - a[k++] = lexical_cast<T> ( t.substr(i1, i2-i1) ); - } else { - a[k++] = lexical_cast<T> ( t.substr(i1, t.size()-i1)); - } - } - } - } + while (!in.eof () and k <= N) { + std::string t; + getline (in, t); + std::size_t i1; + if (t.length () != 0) { + i1 = t.find_first_not_of (ws, 0); + if (i1 != std::string::npos) { + std::size_t i2 = t.find_first_of (ws, i1); + if (i2 != std::string::npos) { + a[k++] = lexical_cast<T> ( t.substr(i1, i2-i1) ); + } else { + a[k++] = lexical_cast<T> ( t.substr(i1, t.size()-i1)); + } + } + } + } } template <class T> int readArrays ( const std::string &fname, std::size_t &s, - std::size_t n, T* &arr ) + std::size_t n, T* &arr ) { - // open stream - std::ifstream in (fname.c_str()); - // read number of rows - if (in) { - s = readLines(in)-1; - in.close(); - } else { - std::cout << "could not open " << fname << std::endl; - return 1; - } - if (s > 0) { - arr = new T[s * n]; - std::size_t j(0), l(0); - // read values - in.open (fname.c_str(), std::ios::in); - for (j=0; j<s; ++j) { - for (l=0; l<n; l++) in >> arr[l*s+j]; - } - in.close (); - } - return 0; + // open stream + std::ifstream in (fname.c_str()); + // read number of rows + if (in) { + s = readLines(in)-1; + in.close(); + } else { + std::cout << "could not open " << fname << std::endl; + return 1; + } + if (s > 0) { + arr = new T[s * n]; + std::size_t j(0), l(0); + // read values + in.open (fname.c_str(), std::ios::in); + for (j=0; j<s; ++j) { + for (l=0; l<n; l++) in >> arr[l*s+j]; + } + in.close (); + } + return 0; } #endif -- GitLab