//------------------------------------------------------------------------------ /* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */ //------------------------------------------------------------------------------ /* LA: linear algebra C++ interface library Copyright (C) 2008 Jiri Pittner or complex versions written by Roman Curik This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef _LA_SMAT_H_ #define _LA_SMAT_H_ #include "la_traits.h" namespace LA { #define NN2 ((size_t)nn*(nn+1)/2) //forward declaration template class NRPerm; template class NRSMat_from1; /***************************************************************************//** * This class implements a general symmetric or hermitian matrix the elements * of which are stored in packed form. Particularly the lower triangular part * of a symmetric or hermitian matrix of order \f$N\f$ is interpreted as a * vector of length \f$N(N+1)/2\f$ in row-major storage scheme. ******************************************************************************/ template class NRSMat{ protected: int nn;//!< number of rows/columns of this symmetric matrix T *v;//!< internal pointer to the underlying data structure int *count;//!< pointer to the reference counter #ifdef CUDALA GPUID location;//!< specification of memory (GPU/CPU) location where this objects resides #endif public: friend class NRVec; friend class NRMat; ~NRSMat(); //! default constructor of null-matrix inline NRSMat() : nn(0),v(0),count(0) { #ifdef CUDALA location = DEFAULT_LOC; #endif }; //! default constructor inline explicit NRSMat(const int n, const GPUID loc = undefined); //! constructor initializing the matrix being created by given scalar value inline NRSMat(const T &a, const int n); //! constructor initializing the matrix being created by data located at given memory position inline NRSMat(const T *a, const int n); //! copy constructor inline NRSMat(const NRSMat &rhs); //! constructor converting real matrix to its complex counterpart NRSMat(const typename LA_traits_complex::NRSMat_Noncomplex_type &rhs, bool imagpart = false); //! constructor creating symmetric part of a general matrix explicit NRSMat(const NRMat &rhs); //! construct symmetric matrix by filling the lower triangle with data stored in a vector explicit NRSMat(const NRVec &rhs, const int n); //! assignment operator performing shallow copy NRSMat & operator=(const NRSMat &rhs); //! assignment operator performing deep copy NRSMat & operator|=(const NRSMat &rhs); //! fill the matrix with pseudorandom numbers (uniform distribution) void randomize(const typename LA_traits::normtype &x); //! assign scalar value to diagonal elements NRSMat & operator=(const T &a); //! unit matrix for general power void identity() {*this = (T)1;} //! inverse matrix NRSMat inverse(); //! permute matrix elements const NRSMat permuted(const NRPerm &p, const bool inverse=false) const; inline int getcount() const {return count?*count:0;} #ifdef CUDALA inline GPUID getlocation() const {return location;} void moveto(const GPUID dest); #else inline GPUID getlocation() const {return cpu;} void moveto(const GPUID dest) {}; #endif //! relational operator for testing nonequality const bool operator!=(const NRSMat &rhs) const {if(nn!=rhs.nn) return 1; return LA_traits::gencmp(v,rhs.v,NN2);}; //! relational operator for testing equality const bool operator==(const NRSMat &rhs) const {return !(*this != rhs);}; inline NRSMat & operator*=(const T &a); inline NRSMat & operator+=(const T &a); inline NRSMat & operator-=(const T &a); inline NRSMat & operator+=(const NRSMat &rhs); inline NRSMat & operator-=(const NRSMat &rhs); const NRSMat operator-() const; inline const NRSMat operator*(const T &a) const; inline const NRSMat operator+(const T &a) const; inline const NRSMat operator-(const T &a) const; inline const NRSMat operator+(const NRSMat &rhs) const; inline const NRSMat operator-(const NRSMat &rhs) const; inline const NRMat operator+(const NRMat &rhs) const; inline const NRMat operator-(const NRMat &rhs) const; const NRMat operator*(const NRSMat &rhs) const; const NRMat operator*(const NRMat &rhs) const; const T dot(const NRSMat &rhs) const; const T dot(const NRVec &rhs) const; const NRVec operator*(const NRVec &rhs) const {NRVec result(nn,rhs.getlocation()); result.gemv((T)0,*this,'n',(T)1,rhs); return result;}; const NRVec > operator*(const NRVec > &rhs) const {NRVec > result(nn,rhs.getlocation()); result.gemv((T)0,*this,'n',(T)1,rhs); return result;}; const T* diagonalof(NRVec &, const bool divide = 0, bool cache = false) const; void gemv(const T beta, NRVec &r, const char trans, const T alpha, const NRVec &x) const {r.gemv(beta,*this,trans,alpha,x);}; void gemv(const T beta, NRVec > &r, const char trans, const T alpha, const NRVec > &x) const {r.gemv(beta,*this,trans,alpha,x);}; inline const T& operator[](const size_t ij) const; inline T& operator[](const size_t ij); inline const T& operator()(const int i, const int j) const; inline T& operator()(const int i, const int j); inline int nrows() const; inline int ncols() const; inline size_t size() const; //NOTE it is the size of data n*(n+1)/2, not nrows inline bool transp(const int i, const int j) const {return i>j;} //this can be used for compact storage of matrices, which are actually not symmetric, but one triangle of them is redundant const typename LA_traits::normtype norm(const T scalar = (T)0) const; void axpy(const T alpha, const NRSMat &x); // this+= a*x inline const T amax() const; inline const T amin() const; const T trace() const; void get(int fd, bool dimensions = 1, bool transp = 0); void put(int fd, bool dimensions = 1, bool transp = 0) const; void copyonwrite(bool detachonly=false, bool deep=true); void clear() {copyonwrite(LA_traits::is_plaindata()); LA_traits::clear(v,NN2);}; //zero out void resize(const int n); void dealloc(void) {resize(0);} //! extract specified submatrix const NRSMat submatrix(const int from, const int to) const; const NRSMat submatrix(const NRVec &selection) const; //! store given matrix at given position into the current matrix void storesubmatrix(const int from, const NRSMat &rhs); void storesubmatrix(const NRVec &selection, const NRSMat &rhs); inline operator T*(); inline operator const T*() const; void fprintf(FILE *f, const char *format, const int modulo) const; void fscanf(FILE *f, const char *format); //members concerning sparse matrix explicit NRSMat(const SparseMat &rhs); // dense from sparse explicit NRSMat(const SparseSMat &rhs); // dense from sparse inline void simplify() {}; //just for compatibility with sparse ones bool issymmetric() const {return 1;} void swap(NRSMat &rhs) //more efficient swap than via tmp and constructors and operator= { int tmpnn=nn; nn=rhs.nn; rhs.nn=tmpnn; T *tmpv=v; v=rhs.v; rhs.v=tmpv; int *tmpcount=count; count=rhs.count; rhs.count=tmpcount; #ifdef CUDALA GPUID tmplocation=location; location=rhs.location; rhs.location=tmplocation; #endif } NRSMat::normtype> diffabs(const NRSMat &rhs) const; //difference of absolute values NRSMat::normtype> abs() const; //elementwise absolute values }; }//namespace //due to mutual includes this has to be after full class declaration #include "vec.h" #include "mat.h" #include "permutation.h" namespace LA { /***************************************************************************//** * constructor of a symmetric matrix stored in packed form * @param[in] n number of rows of the matrix being created * @param[in] loc location for storing the matrix * @see count, v, location ******************************************************************************/ template inline NRSMat::NRSMat(const int n, const GPUID loc): nn(n), count(new int(1)) { #ifdef CUDALA location = (loc == undefined?DEFAULT_LOC:loc); if(location == cpu){ #endif v = new T[NN2]; #ifdef CUDALA }else{ v = (T*) gpualloc(NN2*sizeof(T)); } #endif } /***************************************************************************//** * constructor of a symmetric matrix stored in packed form (default location in used) * @param[in] a set all matrix elements equal to this value * @param[in] n number of rows of the matrix being created * @see count, v, location, NRSMat::NRSMat(const int, const GPUID) ******************************************************************************/ template inline NRSMat::NRSMat(const T& a, const int n) : nn(n), count(new int(1)) { #ifdef CUDALA location = DEFAULT_LOC; if(location == cpu){ #endif v = new T[NN2]; if(!LA_traits::is_plaindata() || a != (T)0) for(register size_t i = 0; i::is_plaindata()) laerror("only implemented for plain data"); cublasSetVector(NN2, sizeof(T), &a, 0, v, 1); } #endif } /***************************************************************************//** * constructor of a symmetric matrix stored in packed form (default location in used) * @param[in] a pointer to data of type T used for matrix inicialization * @param[in] n number of rows of the matrix being created * @see count, v, location, NRSMat::NRSMat(const int, const GPUID), NRSMat::NRSMat(const T&, const int) ******************************************************************************/ template inline NRSMat::NRSMat(const T *a, const int n) : nn(n), count(new int(1)) { #ifdef CUDALA location = DEFAULT_LOC; if(location == cpu){ #endif if(LA_traits::is_plaindata()) memcpy(v, a, NN2*sizeof(T)); else for( int i=0; i::is_plaindata()) laerror("only implemented for plain data"); cublasSetVector(NN2, sizeof(T), a, 1, v, 1); } #endif } /***************************************************************************//** * copy constructor implementing shallow copy * @param[in] rhs reference matrix being copied * @see count, v, location ******************************************************************************/ template inline NRSMat::NRSMat(const NRSMat &rhs) { #ifdef CUDALA location = rhs.location; #endif v = rhs.v; nn = rhs.nn; count = rhs.count; if(count) (*count)++; } /***************************************************************************//** * constructor interpreting a vector of \f$n(n+1)/2\f$ elements as a symmetric * matrix stored in packed form having \f$n\f$ rows * @param[in] rhs reference matrix being copied * @param[in] n count of rows of the matrix being created ******************************************************************************/ template NRSMat::NRSMat(const NRVec &rhs, const int n) { #ifdef CUDALA location = rhs.location; #endif nn = n; #ifdef DEBUG if(NN2 != rhs.size()){ laerror("incompatible dimensions in NRSMat::NRSMat(const NRVec&, const int)"); } #endif count = rhs.count; v = rhs.v; (*count)++; } /***************************************************************************//** * multiply this real symmetric matrix with real scalar value * @param[in] a real multiplicative factor * @return reference to the modified matrix ******************************************************************************/ template<> inline NRSMat & NRSMat::operator*=(const double &a) { copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_dscal(NN2, a, v, 1); #ifdef CUDALA }else{ cublasDscal(NN2, a, v, 1); TEST_CUBLAS("cublasDscal");//"NRSMat& NRSMat::operator*=(const double &)" } #endif return *this; } /***************************************************************************//** * multiply this complex symmetric matrix with complex scalar value * @param[in] a complex multiplicative factor * @return reference to the modified matrix ******************************************************************************/ template<> inline NRSMat > & NRSMat >::operator*=(const std::complex &a) { copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_zscal(NN2, &a, v, 1); #ifdef CUDALA }else{ const cuDoubleComplex _a = make_cuDoubleComplex(a.real(), a.imag()); cublasZscal(NN2, _a, (cuDoubleComplex*)v, 1); TEST_CUBLAS("cublasZscal");//"NRSMat >& NRSMat >::operator*=(const std::complex &)" } #endif return *this; } /***************************************************************************//** * multiply this symmetric matrix of general type T stored in packed form * with scalar value of type T * @param[in] a multiplicative factor of type T * @return reference to the modified matrix ******************************************************************************/ template inline NRSMat & NRSMat::operator*=(const T &a) { NOT_GPU(*this); copyonwrite(); for(register size_t i = 0; iT to the diagonal elements of this symmetric matrix of type T * @param[in] a scalar value \f$\alpha\f$ * @return reference to the modified matrix ******************************************************************************/ template inline NRSMat & NRSMat::operator+=(const T &a) { NOT_GPU(*this); copyonwrite(); for(register int i = 0; iT from the * diagonal elements of this symmetric matrix of type T * @param[in] a scalar value \f$\alpha\f$ * @return reference to the modified matrix ******************************************************************************/ template inline NRSMat & NRSMat::operator-=(const T &a) { NOT_GPU(*this); copyonwrite(); for(register int i = 0; i NRSMat::normtype> NRSMat::diffabs(const NRSMat &rhs) const { #ifdef DEBUG if (nn != rhs.nn) laerror("incompatible dimensions"); #endif NOT_GPU(*this); NOT_GPU(rhs); NRSMat::normtype> r(nn); for(int i=0; i NRSMat::normtype> NRSMat::abs() const { NOT_GPU(*this); NRSMat::normtype> r(nn); for(int i=0; i inline NRSMat& NRSMat::operator+=(const NRSMat & rhs) { #ifdef DEBUG if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat& NRSMat::operator+=(const NRSMat &)"); #endif SAME_LOC(*this, rhs); copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_daxpy(NN2, 1.0, rhs.v, 1, v, 1); #ifdef CUDALA }else{ cublasDaxpy(NN2, 1.0, rhs.v, 1, v, 1); TEST_CUBLAS("cublasDaxpy");//" NRSMat& NRSMat::operator+=(const NRSMat &)" } #endif return *this; } /***************************************************************************//** * add up this complex symmetric matrix with given symmetric matrix * @param[in] rhs complex symmetric matrix to be added * @return reference to the modified matrix ******************************************************************************/ template<> inline NRSMat >& NRSMat >::operator+=(const NRSMat > & rhs) { #ifdef DEBUG if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat >& NRSMat >::operator+=(const NRSMat > &)"); #endif SAME_LOC(*this, rhs); copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_zaxpy(NN2, &CONE, rhs.v, 1, v, 1); #ifdef CUDALA }else{ cublasZaxpy(NN2, CUONE, (cuDoubleComplex*)(rhs.v), 1, (cuDoubleComplex*)v, 1); TEST_CUBLAS("cublasZaxpy");//"NRSMat >& NRSMat >::operator+=(const NRSMat > &)" } #endif return *this; } /***************************************************************************//** * add up this symmetric matrix of general type T with given symmetric matrix * @param[in] rhs complex matrix of general type T to be added * @return reference to the modified matrix ******************************************************************************/ template inline NRSMat& NRSMat::operator+=(const NRSMat& rhs) { #ifdef DEBUG if(nn != rhs.nn) laerror("incompatible NRSMat& NRSMat::operator+=(const NRSMat &)"); #endif NOT_GPU(*this); SAME_LOC(*this, rhs); copyonwrite(); for(register size_t i = 0; i inline NRSMat& NRSMat::operator-=(const NRSMat& rhs) { #ifdef DEBUG if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat& NRSMat::operator-=(const NRSMat &)"); #endif SAME_LOC(*this, rhs); copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_daxpy(NN2, -1.0, rhs.v, 1, v, 1); #ifdef CUDALA }else{ cublasDaxpy(NN2, -1.0, rhs.v, 1, v, 1); TEST_CUBLAS("cublasDaxpy");//" NRSMat& NRSMat::operator-=(const NRSMat &)" } #endif return *this; } /***************************************************************************//** * subtracts given complex symmetric matrix from this complex symmetric matrix * @param[in] rhs complex symmetric matrix to be subtracted * @return reference to the modified matrix ******************************************************************************/ template<> inline NRSMat >& NRSMat >::operator-=(const NRSMat >& rhs) { #ifdef DEBUG if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat >& NRSMat >::operator-=(const NRSMat > &)"); #endif SAME_LOC(*this, rhs); copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_zaxpy(NN2, &CMONE, rhs.v, 1, v, 1); #ifdef CUDALA }else{ cublasZaxpy(NN2, CUMONE, (cuDoubleComplex*)(rhs.v), 1, (cuDoubleComplex*)v, 1); TEST_CUBLAS("cublasZaxpy");//"NRSMat >& NRSMat >::operator-=(const NRSMat > &)" } #endif return *this; } /***************************************************************************//** * subtracts given symmetric matrix of general type T from this symmetric matrix of type T * @param[in] rhs symmetric matrix of general type T to be subtracted * @return reference to the modified matrix ******************************************************************************/ template inline NRSMat& NRSMat::operator-=(const NRSMat& rhs) { #ifdef DEBUG if(nn != rhs.nn) laerror("incompatible NRSMat& NRSMat::operator-=(const NRSMat &)"); #endif NOT_GPU(*this); copyonwrite(); for(register size_t i = 0; iT with this symmetric matrix of type T * @param[in] rhs dense matrix of type T to be added * @return reference to the modified matrix ******************************************************************************/ template inline const NRMat NRSMat::operator+(const NRMat &rhs) const { return NRMat(rhs) += *this; } /***************************************************************************//** * subtracts given dense matrix of general type T from this symmetric matrix of type T * @param[in] rhs dense matrix of type T to be added * @return reference to the modified matrix ******************************************************************************/ template inline const NRMat NRSMat::operator-(const NRMat &rhs) const { return NRMat(-rhs) += *this; } /***************************************************************************//** * determine matrix element of this symmetric matrix of general type T * using cumulative index increasing in a row-major way and corresponding to the * lower triangular part of the respective dense matrix * @param[in] ij index of the requested element * @return reference to the corresponding matrix element ******************************************************************************/ template inline T& NRSMat::operator[](const size_t ij) { #ifdef DEBUG if(_LA_count_check && *count != 1) laerror("T& NRSMat::operator[] used for matrix with count>1"); if(ij<0 || ij>=NN2) laerror("T& NRSMat::operator[] out of range"); if(!v) laerror("T& NRSMat::operator[] used for unallocated NRSmat object"); #endif NOT_GPU(*this); return v[ij]; } /***************************************************************************//** * determine matrix element of this symmetric matrix of general type T * using cumulative index increasing in a row-major way and corresponding to the * lower triangular part of the respective dense matrix, i.e. \f$A_{i,j}\f$ for * \f$N>i\geq{}j\geq0\f$ corresponds to cumulative index \f$i(i+1)/2+j\f$ * @param[in] ij index of the requested element * @return constant reference to the corresponding matrix element ******************************************************************************/ template inline const T & NRSMat::operator[](const size_t ij) const { #ifdef DEBUG if(ij<0 || ij>=NN2) laerror("T& NRSMat::operator[] out of range"); if(!v) laerror("T& NRSMat::operator[] used for unallocated NRSmat object"); #endif NOT_GPU(*this); return v[ij]; } /***************************************************************************//** * determine the cumulative index or matrix element in row \f$i\f$ and column \f$j\f$ * where \f$0\leq{}i,j inline size_t SMat_index(T i, T j) { return (i>=j) ? i*(size_t)(i+1)/2+j : j*(size_t)(j+1)/2+i; } /***************************************************************************//** * determine the cumulative index or matrix element in row \f$i\f$ and column \f$j\f$ * where \f$0\leq{}i,j inline size_t SMat_index_igej(T i, T j) { return i*(size_t)(i+1)/2+j; } /***************************************************************************//** * determine the cumulative index or matrix element in row \f$i\f$ and column \f$j\f$ * where \f$0\leq{}i,j inline size_t SMat_index_ilej(T i, T j) { return j*(size_t)(j+1)/2+i; } /***************************************************************************//** * determine the cumulative index or matrix element in row \f$i\f$ and column \f$j\f$ * where \f$1\leq{}i,j\leq{}N\f$ * @param[in] i row index * @param[in] i column index * @return cumulative index ******************************************************************************/ template inline size_t SMat_index_1(T i, T j) { return (i>=j)? i*(size_t)(i-1)/2+j-1 : j*(size_t)(j-1)/2+i-1; } /***************************************************************************//** * determine the cumulative index or matrix element in row \f$i\f$ and column \f$j\f$ * where \f$1\leq{}i,j\leq{}N\f$ for special case \f$i\geq{}j\f$ * @param[in] i row index * @param[in] i column index * @return cumulative index ******************************************************************************/ template inline size_t SMat_index_1igej(T i, T j) { return i*(size_t)(i-1)/2+j-1; } /***************************************************************************//** * determine the cumulative index or matrix element in row \f$i\f$ and column \f$j\f$ * where \f$1\leq{}i,j\leq{}N\f$ for special case \f$i\leq{}j\f$ * @param[in] i row index * @param[in] i column index * @return cumulative index ******************************************************************************/ template inline size_t SMat_index_1ilej(T i, T j) { return j*(size_t)(j-1)/2+i-1; } //indexing for antisymmetric matrix (used by fourindex) template inline size_t ASMat_index(T i, T j) { if(i == j) return -1; return (i>j) ? i*(size_t)(i-1)/2+j : j*(size_t)(j-1)/2+i; } template inline size_t ASMat_index_1(T i, T j) { if(i == j) return -1; return (i>j)? (i-2)*(i-1)/2+j-1 : (j-2)*(j-1)/2+i-1; } /***************************************************************************//** * determine matrix element of this symmetric matrix of general type T * @param[in] i row index running from 0 * @param[in] j column index running from 0 * @return reference to the corresponding matrix element * @see count, SMat_index, NRSMat::operator[] ******************************************************************************/ template inline T & NRSMat::operator()(const int i, const int j) { #ifdef DEBUG if(_LA_count_check && *count != 1) laerror("T & NRSMat::operator()(const int, const int) used for matrix with count > 1"); if(i<0 || i>=nn || j<0 || j>=nn) laerror("T & NRSMat::operator()(const int, const int) out of range"); if(!v) laerror("T & NRSMat::operator()(const int, const int) used for unallocated NRSmat object"); #endif NOT_GPU(*this); return v[SMat_index(i,j)]; } /***************************************************************************//** * determine matrix element of this symmetric matrix of general type T * @param[in] i row index running from 0 * @param[in] j column index running from 0 * @return constant reference to the corresponding matrix element * @see count, SMat_index, NRSMat::operator[] ******************************************************************************/ template inline const T & NRSMat::operator()(const int i, const int j) const { #ifdef DEBUG if(i<0 || i>=nn || j<0 || j>=nn) laerror("T & NRSMat::operator()(const int, const int) out of range"); if(!v) laerror("T & NRSMat::operator()(const int, const int) used for unallocated NRSmat object"); #endif NOT_GPU(*this); return v[SMat_index(i,j)]; } /***************************************************************************//** * @return number of rows of this symmetric matrix of generalt type T ******************************************************************************/ template inline int NRSMat::nrows() const { return nn; } /***************************************************************************//** * @return number of columns of this symmetric matrix of generalt type T ******************************************************************************/ template inline int NRSMat::ncols() const { return nn; } /***************************************************************************//** * @return number of elements of this symmetric matrix of generalt type T ******************************************************************************/ template inline size_t NRSMat::size() const { return NN2; } /***************************************************************************//** * for this real symmetric matrix \f$A\f$, determine the * first element with largest absolute value * @return \f$A_{l,m}\f$ which maximizes \f$\left|A_{i,j}\right|\f$ ******************************************************************************/ template<> inline const double NRSMat::amax() const { double ret(0.0); #ifdef CUDALA if(location == cpu){ #endif ret = v[cblas_idamax(NN2, v, 1) - 1]; #ifdef CUDALA }else{ const int pozice = cublasIdamax(NN2, v, 1) - 1; TEST_CUBLAS("cublasIdamax");//"double NRSMat::amax()" gpuget(1, sizeof(double), v + pozice, &ret); } #endif return ret; } /***************************************************************************//** * for this real symmetric matrix \f$A\f$, determine the * first element with smallest absolute value * @return \f$A_{l,m}\f$ which minimizes \f$\left|A_{i,j}\right|\f$ ******************************************************************************/ template<> inline const double NRSMat::amin() const { double ret(0.0); #ifdef CUDALA if(location == cpu){ #endif // idamin seems not to be supported double val(0.0); int index(-1); ret = std::numeric_limits::max(); for(register size_t i = 0; i < NN2; i++){ val = std::abs(v[i]); if(val < ret){ index = i; ret = val; } } ret = v[index]; #ifdef CUDALA }else{ const int pozice = cublasIdamin(nn, v, 1) - 1; TEST_CUBLAS("cublasIdamin");//"double NRSMat::amin()" gpuget(1, sizeof(double), v + pozice, &ret); } #endif return ret; } /***************************************************************************//** * for this complex symmetric matrix \f$A\f$, determine the * first element with largest "absolute value" * @return \f$A_{l,m}\f$ which maximizes \f$\left|\Re{}A_{i,j}\right| + \left|\Im{}A_{i,j}\right|\f$ ******************************************************************************/ template<> inline const std::complex NRSMat< std::complex >::amax() const{ std::complex ret(0., 0.); #ifdef CUDALA if(location == cpu){ #endif ret = v[cblas_izamax(NN2, v, 1) - 1]; #ifdef CUDALA }else{ const int pozice = cublasIzamax(NN2, (cuDoubleComplex*)v, 1) - 1; TEST_CUBLAS("cublasIzamax");//"std::complex NRSMat >::amax()" gpuget(1, sizeof(std::complex), v + pozice, &ret); } #endif return ret; } /***************************************************************************//** * for this complex symmetric matrix \f$A\f$, determine the * first element with smallest "absolute value" * @return \f$A_{l,m}\f$ which minimizes \f$\left|\Re{}A_{i,j}\right| + \left|\Im{}A_{i,j}\right|\f$ ******************************************************************************/ template<> inline const std::complex NRSMat >::amin() const{ std::complex ret(0., 0.); #ifdef CUDALA if(location == cpu){ #endif // izamin seems not to be supported int index(0); double val(0.0), min_val(0.0); std::complex z_val(0.0, 0.0); min_val = std::numeric_limits::max(); for(register size_t i = 0; i < NN2; i++){ z_val = v[i]; val = std::abs(z_val.real()) + std::abs(z_val.imag()); if(val < min_val){ index = i; min_val = val; } } ret = v[index]; #ifdef CUDALA }else{ const int pozice = cublasIzamin(nn, (cuDoubleComplex*)v, 1) - 1; TEST_CUBLAS("cublasIzamin");//"std::complex NRSMat >::amin()" gpuget(1, sizeof(std::complex), v + pozice, &ret); } #endif return ret; } /***************************************************************************//** * @return pointer of general type T to the underlying data structure ******************************************************************************/ template inline NRSMat::operator T*() { #ifdef DEBUG if(!v) laerror("unallocated NRSMat object in NRSMat::operator T*()"); #endif return v; } /***************************************************************************//** * @return constant pointer of general type T to the underlying data structure ******************************************************************************/ template inline NRSMat::operator const T*() const { #ifdef DEBUG if(!v) laerror("unallocated NRSMat object in NRSMat::operator const T*()"); #endif return v; } /***************************************************************************//** * destructor for general type T * @see NRSMat::count, NRSMat::v ******************************************************************************/ template NRSMat::~NRSMat() { if(!count) return; if(--(*count) <= 0) { if(v){ #ifdef CUDALA if(location == cpu){ #endif delete[] v; #ifdef CUDALA }else{ gpufree(v); } #endif } delete count; } } /***************************************************************************//** * assigment operator implementing deep copy of the reference NRSMat object * @see NRSMat::operator=, NRSMat::copyonwrite() ******************************************************************************/ template NRSMat & NRSMat::operator|=(const NRSMat &rhs) { #ifdef DEBUG if(!rhs.v) laerror("unallocated NRSMat object in NRSMat & NRSMat::operator|=(const NRSMat &)"); #endif if(this == &rhs) return *this; *this = rhs; this->copyonwrite(); return *this; } /***************************************************************************//** * assignment operator implementing shallow copy of reference NRSMat object * @see NRSMat::operator|=, NRSMat::copyonwrite() ******************************************************************************/ template NRSMat & NRSMat::operator=(const NRSMat & rhs) { if(this == &rhs) return *this; if(count) if(--(*count) == 0){ #ifdef CUDALA if(location == cpu){ #endif delete [] v; #ifdef CUDALA }else{ gpufree(v); } #endif delete count; } v = rhs.v; nn = rhs.nn; count = rhs.count; #ifdef CUDALA location = rhs.location; #endif if(count) (*count)++; return *this; } /***************************************************************************//** * detach this NRSMat object and create own physical copy of the data * @see NRSMat::operator|=, NRSMat::copyonwrite() ******************************************************************************/ template void NRSMat::copyonwrite(bool detachonly, bool deep) { if(!count) { if(nn) laerror("nonempty smat without reference count encountered"); if(_LA_warn_empty_copyonwrite) std::cout <<"Warning: copyonwrite of empty smat\n"; return; } if(*count == 1 && !LA_traits::is_plaindata() && !detachonly && deep) //type-nested copyonwrite { #ifdef CUDALA if(location != cpu) laerror("nested types not supported on gpu memory"); #endif for(int i=0; i::copyonwrite(v[i]); } if(*count > 1){ (*count)--; count = new int; *count = 1; T *newv; #ifdef CUDALA if(location == cpu) { #endif newv = new T[NN2]; if(!detachonly) { if(LA_traits::is_plaindata()) memcpy(newv, v, NN2*sizeof(T)); else { for(int i=0; i::copyonwrite(newv[i]);} } } #ifdef CUDALA }else{ f(!LA_traits::is_plaindata()) laerror("nested types not supported on gpu memory"); newv = (T *) gpualloc(NN2*sizeof(T)); if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRSMat::copyonwrite()"); if(!detachonly) cublasScopy(NN2*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1); TEST_CUBLAS("cublasScopy");//"NRSMat::copyonwrite()" } #endif v = newv; } } /***************************************************************************//** * resize this symmetric matrix of general type T * @param[in] n requested number of rows (columns) ******************************************************************************/ template void NRSMat::resize(const int n) { #ifdef DEBUG if(n < 0) laerror("illegal dimension in NRSMat::resize(const int)"); #endif if(count){ if(n == 0){ if(--(*count) <= 0) { if(v) { #ifdef CUDALA if(location == cpu){ #endif delete[] (v); #ifdef CUDALA }else{ gpufree(v); } #endif } delete count; } count = 0; nn = 0; v = 0; return; } if(*count > 1){ (*count)--; count = 0; v = 0; nn = 0; } } if(!count){ count = new int; *count = 1; nn = n; #ifdef CUDALA if(location == cpu){ #endif v = new T[NN2]; #ifdef CUDALA }else{ v = (T*) gpualloc(NN2*sizeof(T)); } #endif return; } if(n != nn){ nn = n; #ifdef CUDALA if(location == cpu){ #endif delete[] v; v = new T[NN2]; #ifdef CUDALA }else{ gpufree(v); v = (T*) gpualloc(NN2*sizeof(T)); } #endif } } /***************************************************************************//** * perform memory transfers between CPU and GPU memory * @param[in] dest memory destination * @see NRSMat::location, NRSMat::getlocation() ******************************************************************************/ #ifdef CUDALA template void NRSMat::moveto(const GPUID dest) { if(location == dest) return; CPU_GPU(location,dest); location = dest; if(v && !count) laerror("internal inconsistency of reference counting 1"); if(!count) return; if(v && *count == 0) laerror("internal inconsistency of reference counting 2"); if(!v) return; T *vold = v; if(dest == cpu){ //moving from GPU to CPU v = new T[NN2]; gpuget(NN2, sizeof(T), vold, v); if(*count == 1) gpufree(vold); else {--(*count); count = new int(1);} }else{ //moving from CPU to GPU v = (T *) gpualloc(NN2*sizeof(T)); gpuput(NN2, sizeof(T), vold, v); if(*count == 1) delete[] vold; else {--(*count); count = new int(1);} } } #endif /***************************************************************************//** * complexify a given matrix \f$A\f$ of general type T * @param[in] rhs matrix \f$A\f$ intended for this operation * @return matrix \f$B\f$ where \f$\Re B=A\f$ and \f$\Im B = 0\f$ ******************************************************************************/ template NRSMat > complexify(const NRSMat &rhs) { NOT_GPU(rhs); NRSMat > r(rhs.nrows()); for(register int i = 0; i NRSMat > complexify(const NRSMat &rhs) { NRSMat > r(rhs.nrows(), rhs.getlocation()); #ifdef CUDALA if(rhs.getlocation() == cpu){ #endif cblas_dcopy(rhs.size(), &(rhs[0]), 1, (double*)(&(r[0])), 2); #ifdef CUDALA }else{ cublasDcopy(rhs.size(), &(rhs[0]), 1, (double*)(&(r[0])), 2); TEST_CUBLAS("cublasDcopy");//"NRSMat > complexify(const NRSMat &)" } #endif return r; } */ /***************************************************************************//** * output operator * @param[in,out] s output stream * @param[in] x NRSMat matrix to be printed out * @return modified stream ******************************************************************************/ template std::ostream& operator<<(std::ostream &s, const NRSMat &x) { #ifdef CUDALA if(x.getlocation() == cpu){ #endif int i,j,n; n = x.nrows(); s << n << ' ' << n << '\n'; for(i = 0;i::IOtype)x(i,j) << (j == n-1 ? '\n' : ' '); } return s; #ifdef CUDALA }else{ NRSMat tmp = x; tmp.moveto(cpu); return s< matrix for storing the input * @return modified stream ******************************************************************************/ template std::istream& operator>>(std::istream &s, NRSMat &x) { #ifdef CUDALA if(x.getlocation() == cpu){ #endif int i,j,n,m; s >> n >> m; if(n!=m) laerror("input symmetric matrix not square"); x.resize(n); typename LA_traits_io::IOtype tmp; for(i = 0;i>tmp; x(i,j)=tmp;} return s; #ifdef CUDALA }else{ NRSMat tmp; tmp.moveto(cpu); s >> tmp; tmp.moveto(x.getlocation()); x = tmp; return s; } #endif } //convert whole matrix between types template void NRSMat_convert(NRSMat &a, const NRSMat &b) { a.resize(b.nrows(),b.ncols()); for(int i=0; i objects and scalars * corresponding macro is defined in vec.h ******************************************************************************/ NRVECMAT_OPER(SMat,+) NRVECMAT_OPER(SMat,-) NRVECMAT_OPER(SMat,*) /***************************************************************************//** * generate operators relating in general two NRSMat objects * corresponding macro is defined in vec.h ******************************************************************************/ NRVECMAT_OPER2(SMat,+) NRVECMAT_OPER2(SMat,-) /***************************************************************************//** * class implementing NRSMat funcitonality with indices running from 1 * allmost all function members are inherited, only constructors are given explicitly ******************************************************************************/ template class NRSMat_from1 : public NRSMat { public: NRSMat_from1(): NRSMat() {}; explicit NRSMat_from1(const int n): NRSMat(n) {}; NRSMat_from1(const NRSMat &rhs): NRSMat(rhs) {}; //be able to convert the parent class transparently to this NRSMat_from1(const T &a, const int n): NRSMat(a,n) {}; NRSMat_from1(const T *a, const int n): NRSMat(a,n) {}; explicit NRSMat_from1(const NRMat &rhs): NRSMat(rhs) {}; explicit NRSMat_from1(const NRVec &rhs, const int n): NRSMat(rhs,n) {}; inline const T& operator() (const int i, const int j) const { #ifdef DEBUG if(i<=0||j<=0||i>NRSMat::nn||j>NRSMat::nn) laerror("index in const T& NRSMat::operator() (const int, const int) out of range"); #endif return NRSMat::v[SMat_index_1(i,j)]; } inline T& operator() (const int i, const int j){ #ifdef DEBUG if(i<=0||j<=0||i>NRSMat::nn||j>NRSMat::nn) laerror("index in T& NRSMat::operator() (const int, const int) out of range"); #endif return NRSMat::v[SMat_index_1(i,j)]; } }; }//namespace #endif /* _LA_SMAT_H_ */