diff --git a/MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h b/MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h index f9161a3f944b34c354039d11ecb1ffbd1ac0d6c4..eb19ea7ff883fb213fce01a9da6aa3cb232e399b 100644 --- a/MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h +++ b/MathLib/LinAlg/Sparse/CRSMatrixOpenMP.h @@ -38,7 +38,7 @@ public: virtual ~CRSMatrixOpenMP() {} - virtual void amux(FP_TYPE d, FP_TYPE const * const x, FP_TYPE *y) const + virtual void amux(FP_TYPE const d, FP_TYPE const * const x, FP_TYPE *y) const { amuxCRSParallelOpenMP(d, MatrixBase::_n_rows, CRSMatrix<FP_TYPE,IDX_TYPE>::_row_ptr, CRSMatrix<FP_TYPE,IDX_TYPE>::_col_idx, CRSMatrix<FP_TYPE,IDX_TYPE>::_data, x, y); } diff --git a/MathLib/LinAlg/Sparse/amuxCRS.cpp b/MathLib/LinAlg/Sparse/amuxCRS.cpp index b73aeb160e7198257ad43a1bdf8c0d8aeda6cf41..178311d3a882e2782d51a4da737899b264f79f5d 100644 --- a/MathLib/LinAlg/Sparse/amuxCRS.cpp +++ b/MathLib/LinAlg/Sparse/amuxCRS.cpp @@ -110,6 +110,8 @@ void amuxCRSParallelPThreads (double a, double const * const A, double const * const x, double* y, unsigned num_of_pthreads, unsigned const*const workload_intervals) { + (void) n; // Unused if HAVE_PTHREADS is not defined. + #ifdef HAVE_PTHREADS // fill thread data objects MatMultThreadParam** thread_param_array (new MatMultThreadParam*[num_of_pthreads]); diff --git a/MathLib/LinAlg/Sparse/amuxCRS.h b/MathLib/LinAlg/Sparse/amuxCRS.h index 721f24bec53ae7ca4268ad6bf5a4dc73c5531380..0adf588862ab3c488f0aff28ce5fde6645ed0445 100644 --- a/MathLib/LinAlg/Sparse/amuxCRS.h +++ b/MathLib/LinAlg/Sparse/amuxCRS.h @@ -41,20 +41,23 @@ void amuxCRSParallelPThreads (double a, #ifdef _OPENMP template<typename FP_TYPE, typename IDX_TYPE> -void amuxCRSParallelOpenMP (FP_TYPE a, - unsigned n, IDX_TYPE const * const __restrict__ iA, IDX_TYPE const * const __restrict__ jA, - FP_TYPE const * const A, FP_TYPE const * const __restrict__ x, FP_TYPE* __restrict__ y) +void amuxCRSParallelOpenMP (FP_TYPE a, unsigned n, + IDX_TYPE const * const __restrict__ iA, + IDX_TYPE const * const __restrict__ jA, FP_TYPE const * const A, + FP_TYPE const * const __restrict__ x, FP_TYPE* __restrict__ y) { OPENMP_LOOP_TYPE i; + IDX_TYPE j; + FP_TYPE t; { -#pragma omp parallel for +#pragma omp parallel for private(i, j, t) for (i = 0; i < n; i++) { const IDX_TYPE end(iA[i + 1]); - y[i] = A[iA[i]] * x[jA[iA[i]]]; - for (IDX_TYPE j(iA[i]+1); j < end; j++) { - y[i] += A[j] * x[jA[j]]; + t = A[iA[i]] * x[jA[iA[i]]]; + for (j = iA[i]+1; j < end; j++) { + t += A[j] * x[jA[j]]; } - y[i] *= a; + y[i] = t * a; } } } diff --git a/SimpleTests/MatrixTests/MatVecMultNDPerm.cpp b/SimpleTests/MatrixTests/MatVecMultNDPerm.cpp index 7390673c7148d64cfce0a79ed501227fd15bb773..00b32c5e4925100bc9ca1b0897cc7b389a8bf8ef 100644 --- a/SimpleTests/MatrixTests/MatVecMultNDPerm.cpp +++ b/SimpleTests/MatrixTests/MatVecMultNDPerm.cpp @@ -72,7 +72,7 @@ int main(int argc, char *argv[]) TCLAP::ValueArg<std::string> output_arg("o", "output", "output file", false, "", "string"); cmd.add( output_arg ); - TCLAP::ValueArg<unsigned> verbosity_arg("v", "verbose", "level of verbosity [0 very low information, 1 much information]", false, 0, "string"); + TCLAP::ValueArg<bool> verbosity_arg("v", "verbose", "level of verbosity [0 very low information, 1 much information]", false, 0, "string"); cmd.add( verbosity_arg ); cmd.parse( argc, argv ); diff --git a/SimpleTests/MatrixTests/MatVecMultNDPermOpenMP.cpp b/SimpleTests/MatrixTests/MatVecMultNDPermOpenMP.cpp index 0077a6279f055f62fd8f49aad496aac96b3bf89c..67bb8a91b6891f6af9599c32c5d08bd7be4c0579 100644 --- a/SimpleTests/MatrixTests/MatVecMultNDPermOpenMP.cpp +++ b/SimpleTests/MatrixTests/MatVecMultNDPermOpenMP.cpp @@ -75,7 +75,7 @@ int main(int argc, char *argv[]) TCLAP::ValueArg<std::string> output_arg("o", "output", "output file", false, "", "string"); cmd.add( output_arg ); - TCLAP::ValueArg<unsigned> verbosity_arg("v", "verbose", "level of verbosity [0 very low information, 1 much information]", false, 0, "string"); + TCLAP::ValueArg<bool> verbosity_arg("v", "verbose", "level of verbosity [0 very low information, 1 much information]", false, 0, "string"); cmd.add( verbosity_arg ); cmd.parse( argc, argv ); diff --git a/SimpleTests/MatrixTests/MatVecMultPthreads.cpp b/SimpleTests/MatrixTests/MatVecMultPthreads.cpp index dfafb120de0b572623679ae5ed09e8766b37fd4e..6c7a09a11e4383551e64b0101f6e5124599d075e 100644 --- a/SimpleTests/MatrixTests/MatVecMultPthreads.cpp +++ b/SimpleTests/MatrixTests/MatVecMultPthreads.cpp @@ -76,7 +76,7 @@ int main(int argc, char *argv[]) TCLAP::ValueArg<std::string> output_arg("o", "output", "output file", false, "", "string"); cmd.add( output_arg ); - TCLAP::ValueArg<unsigned> verbosity_arg("v", "verbose", "level of verbosity [0 very low information, 1 much information]", false, 0, "string"); + TCLAP::ValueArg<bool> verbosity_arg("v", "verbose", "level of verbosity [0 very low information, 1 much information]", false, 0, "string"); cmd.add( verbosity_arg ); cmd.parse( argc, argv ); diff --git a/scripts/cmake/CompilerSetup.cmake b/scripts/cmake/CompilerSetup.cmake index ebf18fd767640e1b7e0104e4bf4e011ee4f55b92..4c9c002e47e947ba8877ed27befe49a2cfc6a4ae 100644 --- a/scripts/cmake/CompilerSetup.cmake +++ b/scripts/cmake/CompilerSetup.cmake @@ -44,7 +44,7 @@ IF (COMPILER_IS_INTEL) MESSAGE(STATUS "Set Intel release flags") SET(CMAKE_CXX_FLAGS "-O3 -DNDEBUG") ENDIF() - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated -Wall") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xHOST -O3 -no-prec-div -static -DNDEBUG") ENDIF() # COMPILER_IS_INTEL # Profiling