From 9dd4b1897601bfccb71744960f464131dc2e7520 Mon Sep 17 00:00:00 2001 From: jiri Date: Wed, 7 Nov 2018 18:21:59 +0000 Subject: [PATCH] *** empty log message *** --- ChangeLog | 9 ++ mat.cc | 245 +++++++++++++++++++++++++++++++++++++++++++++++++++--- mat.h | 10 ++- 3 files changed, 251 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index a559cf5..3355ecc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +07.11.2018 NRMat nonsymmetry and nonhermiticity +03.11.2018 bugfix in bitvector class +25.07.2018 minor bugfix and improvement in davidson.h +12.07.2018 bugfix in fourindex_dense::operator() +11.07.2018 added new symmetry type to fourindex.h +06.11.2017 smat.cc fixed complex lvalue problem for g++ 6.4.0 +25.10.2017 bitvector improved input from stream +27.09.2017 DIIS extended to optionally compute extrapolated error vector +08.11.2016 implemented swap_rows(i,j) and swap_cols(i,j) for NRMat 28.06.2016 minor changes in Makefile.am to work smoothly with recent autotools 28.06.2016 fixed determinant sign issue due to dgesv ipiv counting from 1 in nonclass.cc 11.11.2015 autotool files changed to support separate only-static ATLAS libraries diff --git a/mat.cc b/mat.cc index 62a9f5e..b7fdfa9 100644 --- a/mat.cc +++ b/mat.cc @@ -828,7 +828,7 @@ NRMat& NRMat::transposeme(const int _n) { } #ifdef CUDALA }else{ - +laerror("transposeme not implemented on GPU yet"); } #endif @@ -836,6 +836,108 @@ NRMat& NRMat::transposeme(const int _n) { } +/***************************************************************************//** + * compute matrix non-symmetry + ******************************************************************************/ +template +const typename LA_traits::normtype NRMat::nonsymmetry() const { +#ifdef DEBUG + if (nn != mm) laerror("NRMat:nonsymmetry() invalid for non-square matrix"); +#endif +typename LA_traits::normtype sum = 0; +#ifdef CUDALA + if(location == cpu){ +#endif + for(register int i=1; i +const double NRMat >::nonhermiticity() const { +#ifdef DEBUG + if (nn != mm) laerror("NRMat:nonsymmetry() invalid for non-square matrix"); +#endif +double sum = 0; +complex tmp; +#ifdef CUDALA + if(location == cpu){ +#endif + for(register int i=1; i (v[i][j].real()-v[j][i].real(),v[i][j].imag()+v[j][i].imag()); + #else + register int a, b; + a = i*(size_t)mm + j; + b = j*(size_t)mm + i; + tmp = complex (v[a].real() - v[b].real(), v[a].imag()+v[b].imag()); + #endif + sum += tmp.real()*tmp.real()+tmp.imag()*tmp.imag(); + } + } +#ifdef CUDALA + }else{ +laerror("nonsymmetry not implemented on GPU yet"); + + } +#endif + return sum; +} + +template <> +const double NRMat >::nonsymmetry() const { +#ifdef DEBUG + if (nn != mm) laerror("NRMat:nonsymmetry() invalid for non-square matrix"); +#endif +double sum = 0; +complex tmp; +#ifdef CUDALA + if(location == cpu){ +#endif + for(register int i=1; i& NRMat::conjugateme() { } /***************************************************************************//** - * conjugate this complex matrix \f$A\f$, i.e. do nothing :-) + * conjugate this complex matrix \f$A\f$, or leading minor of size n * @return reference to the modified matrix ******************************************************************************/ template<> @@ -2337,9 +2439,9 @@ const double* NRMat::diagonalof(NRVec &r, const bool divide, boo ******************************************************************************/ template<> void NRMat::diagonalset(const NRVec &r) { +int nnmin= nn<=mm?nn:mm; #ifdef DEBUG - if(r.size() != nn) laerror("incompatible vectors int NRMat::diagonalset(...)"); - if(nn != mm) laerror("NRMat::diagonalset(...) can be used only for square matrices"); + if(r.size() != nnmin) laerror("incompatible vectors int NRMat::diagonalset(...)"); #endif SAME_LOC(*this, r); @@ -2350,14 +2452,14 @@ void NRMat::diagonalset(const NRVec &r) { #endif #ifdef MATPTR - for (int i=0; i::diagonalset(const NRVec &r) { ******************************************************************************/ template<> void NRMat >::diagonalset(const NRVec > &r) { +int nnmin= nn<=mm?nn:mm; #ifdef DEBUG - if(r.size() != nn) laerror("incompatible vectors int NRMat >::diagonalset(...)"); - if(nn != mm) laerror("NRMat >::diagonalset(...) can be used only for square matrices"); + if(r.size() != nnmin) laerror("incompatible vectors int NRMat >::diagonalset(...)"); #endif SAME_LOC(*this, r); copyonwrite(); @@ -2380,13 +2482,13 @@ void NRMat >::diagonalset(const NRVec > &r) { if(location == cpu){ #endif #ifdef MATPTR - for (int i=0; iv), 1); + cublasZcopy(nnmin, (cuDoubleComplex*)(r.v), 1, (cuDoubleComplex*)(this->v), mm+1); } #endif } @@ -2559,6 +2661,23 @@ NRMat& NRMat::swap_rows(){ return *this; } +template<> +NRMat& NRMat::swap_rows(const int i, const int j){ + copyonwrite(); + +#ifdef CUDALA + if(location == cpu){ +#endif + cblas_dswap(mm, (*this)[i], 1, (*this)[j], 1); +#ifdef CUDALA + }else{ + cublasDswap(mm, v + i*(size_t)mm, 1, v + j*mm, 1); + TEST_CUBLAS("cublasDswap"); + } +#endif + return *this; +} + /***************************************************************************//** * interchange the order of the rows of the current (complex) matrix * @return reference to the modified matrix @@ -2585,6 +2704,23 @@ NRMat >& NRMat >::swap_rows(){ return *this; } +template<> +NRMat >& NRMat >::swap_rows(const int i, const int j){ + copyonwrite(); + +#ifdef CUDALA + if(location == cpu){ +#endif + cblas_zswap(mm, (*this)[i], 1, (*this)[j], 1); +#ifdef CUDALA + }else{ + cublasZswap(mm, (cuDoubleComplex*)(v + i*(size_t)mm), 1, (cuDoubleComplex*)(v + j*(size_t)mm), 1); + TEST_CUBLAS("cublasZswap"); + } +#endif + return *this; +} + /***************************************************************************//** * interchange the order of the rows of the current general matrix of type T * for GPU computations, the condition sizeof(T)%sizeof(float) is required @@ -2643,6 +2779,24 @@ NRMat& NRMat::swap_cols(){ #endif return *this; } + +template<> +NRMat& NRMat::swap_cols(const int i, const int j){ + copyonwrite(); + +#ifdef CUDALA + if(location == cpu){ +#endif + cblas_dswap(nn, &((*this)(0, i)), mm, &((*this)(0, j)), mm); +#ifdef CUDALA + }else{ + cublasDswap(nn, v + i, mm, v + j, mm); + TEST_CUBLAS("cublasDswap"); + } +#endif + return *this; +} + /***************************************************************************//** * interchange the order of the columns of the current (complex) matrix * @return reference to the modified matrix @@ -2669,6 +2823,23 @@ NRMat >& NRMat >::swap_cols(){ return *this; } +template<> +NRMat >& NRMat >::swap_cols(const int i, const int j){ + copyonwrite(); + +#ifdef CUDALA + if(location == cpu){ +#endif + cblas_zswap(nn, &((*this)(0, i)), mm, &((*this)(0, j)), mm); +#ifdef CUDALA + }else{ + cublasZswap(nn, (cuDoubleComplex*)(v + i), mm, (cuDoubleComplex*)(v + j), mm); + TEST_CUBLAS("cublasZswap"); + } +#endif + return *this; +} + /***************************************************************************//** * interchange the order of the columns of the current general matrix of type T * because of the cuBlas implementation, the GPU version requires that @@ -2704,6 +2875,56 @@ NRMat& NRMat::swap_cols(){ return *this; } +/*interchange two columns*/ +template +NRMat& NRMat::swap_cols(const int a, const int b){ + T tmp; + copyonwrite(); +#ifdef CUDALA + if(location == cpu){ +#endif + for(register int j=0;j::swap_cols"); + cublasSswap(nn*sizeof(T)/sizeof(float), + (float *)(v + a), mm*sizeof(T)/sizeof(float), + (float *)(v + b), mm*sizeof(T)/sizeof(float) ); + TEST_CUBLAS("cublasSswap"); + } +#endif + return *this; +} + +/*interchange two rows*/ +template +NRMat& NRMat::swap_rows(const int a, const int b){ + T tmp; + copyonwrite(); +#ifdef CUDALA + if(location == cpu){ +#endif + for(register int j=0;j::swap_rows"); + cublasSswap(nn*sizeof(T)/sizeof(float), + (float *)(v + a*mm), sizeof(T)/sizeof(float), + (float *)(v + b*mm), sizeof(T)/sizeof(float) ); + TEST_CUBLAS("cublasSswap"); + } +#endif + return *this; +} + /***************************************************************************//** * interchange the order of the rows and columns of the current * real matrix \f$A\f$ of type T, i.e. perform the operation diff --git a/mat.h b/mat.h index f950ea6..8a997a4 100644 --- a/mat.h +++ b/mat.h @@ -330,6 +330,11 @@ public: //! swap the order of the rows and columns of the current matrix NRMat & swap_rows_cols(); + // LV - swapping of columns i and j + NRMat & swap_cols(const int i, const int j); + // LV - swapping of rows i and j + NRMat & swap_rows(const int i, const int j); + //! multiply by sparse matrix SparseSMat operator*(const SparseSMat &rhs) const; @@ -351,6 +356,9 @@ public: inline void simplify() {}; bool issymmetric() const { return 0; }; + const typename LA_traits::normtype nonsymmetry() const; + const typename LA_traits::normtype nonhermiticity() const; + #ifndef NO_STRASSEN //! Strassen's multiplication (better than \f$\mathacal{O}(n^3)\f$, analogous syntax to \see NRMat::gemm() ) void strassen(const T beta, const NRMat &a, const char transa, const NRMat &b, const char transb, const T alpha); @@ -579,7 +587,7 @@ template NRMat::NRMat(const NRVec &rhs, const int n, const int m, const int offset) { if (offset < 0 || (size_t)n*m + offset > rhs.nn) laerror("matrix dimensions and offset incompatible with vector length"); - + if(offset!=0) std::cout << "dangerous: if the underlying vector is deallocated before the matrix, wrong delete[] will result for nonzero offset!!!\n"; #ifdef CUDALA location=rhs.location; #endif