/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */ /******************************************************************************* LA: linear algebra C++ interface library Copyright (C) 2008 Jiri Pittner or complex versions written by Roman Curik This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . *******************************************************************************/ #ifndef _LA_MAT_H_ #define _LA_MAT_H_ #include "la_traits.h" using namespace LA_Vecmat3; #include "vecmat3.h" namespace LA { //forward declaration template class NRPerm; template class WeightPermutation; template class PermutationAlgebra; template class CyclePerm; template class NRMat_from1; /***************************************************************************//** * \brief NRMat class template implementing the matrix interface * @see NRVec, NRSMat ******************************************************************************/ template class NRMat{ protected: int nn;//!< number of rows int mm;//!< number of columns #ifdef MATPTR T **v;//!< pointer to the array of pointers pointing to the beginings of individual rows #else T *v;//!< pointer to the data stored continuously in emmory #endif int *count;//!< reference counter public: #ifdef CUDALA GPUID location; #endif friend class NRVec; friend class NRSMat; friend class NRMat_from1; friend class NRSMat_from1; //! standard destructor ~NRMat(); /***************************************************************************//** * \brief inlined constructor creating zero matrix of general type T ******************************************************************************/ inline NRMat() : nn(0), mm(0), v(0), count(0){ #ifdef CUDALA location = DEFAULT_LOC; #endif }; /***************************************************************************//** * \brief Inlined constructor creating matrix of given size and location. * Because of performance reasons, no incialization is done. * @param[in] n vector size (count of elements) * @param[in] loc location of the underlying data (CPU/GPU) ******************************************************************************/ inline NRMat(const int n, const int m, const GPUID loc = undefined); //! inlined constructor creating matrix of given size filled with prescribed value and stored at given location inline NRMat(const T &a, const int n, const int m, const GPUID loc); //! inlined constructor creating matrix of given size filled with prescribed value inline NRMat(const T &a, const int n, const int m); //! inlined constructor creating matrix of given size filled with data located at given memory location inline NRMat(const T *a, const int n, const int m); inline NRMat(const Mat3 &rhs) : NRMat(&rhs(0,0),3,3) {}; //! inlined constructor creating matrix of given size from an array template inline NRMat(const T (&a)[R][C]); //! inlined copy-constructor inline NRMat(const NRMat &rhs); //! complexifying constructor NRMat(const typename LA_traits_complex::NRMat_Noncomplex_type &rhs, bool imagpart = false); //! explicit decomplexifying constructor explicit NRMat(const NRMat > &rhs, bool imagpart = false); //! explicit constructor converting symmetric matrix stored in packed form into a NRMat object explicit NRMat(const NRSMat &rhs); //! explicit constructor converting vector into a NRMat object #ifdef MATPTR explicit NRMat(const NRVec &rhs, const int n, const int m, const int offset = 0):NRMat(&rhs[0][0] + offset , n, m){ if (offset < 0 || (size_t)n*m + offset > rhs.nn) laerror("matrix dimensions and offset incompatible with vector length"); }; #else explicit NRMat(const NRVec &rhs, const int n, const int m, const int offset = 0); #endif #ifdef MATPTR const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return LA_traits::gencmp(v[0],rhs.v[0],(size_t)nn*mm);} //memcmp for scalars else elementwise #else const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return LA_traits::gencmp(v,rhs.v,(size_t)nn*mm);} //memcmp for scalars else elementwise #endif const bool operator==(const NRMat &rhs) const {return !(*this != rhs);}; //! determine the count of references to this object inline int getcount() const {return count?*count:0;} //! ensure that the data of this matrix are referenced exactly once void copyonwrite(bool detachonly=false, bool deep=true); //! permute matrix elements const NRMat permuted_rows(const NRPerm &p, const bool inverse=false) const; const NRMat permuted_cols(const NRPerm &p, const bool inverse=false) const; const NRMat permuted_both(const NRPerm &p, const NRPerm &q, const bool inverse=false) const; void permuteme_rows(const CyclePerm &p); //in place void permuteme_cols(const CyclePerm &p); //in place void scale_row(const int i, const T f); //in place void scale_col(const int i, const T f); //in place void axpy(const T alpha, const NRPerm &p, const bool direction); explicit NRMat(const NRPerm &p, const bool direction, const bool parity=false); //permutation matrix explicit NRMat(const WeightPermutation &p, const bool direction); explicit NRMat(const PermutationAlgebra &p, const bool direction, const int nforce=0); //note that one cannot represent e.g. young projectors in this way, since the representation of S(n) by permutation matrices is reducible just to two irreps [n] and [n-1,1] since the charater of that RR = number of cycles of length=1 /***************************************************************************//** * routines for CUDA related stuff * \li getlocation() gets the protected data member location * \li moveto(const GPUID) moves underlying data between CPU/GPU memory ******************************************************************************/ #ifdef CUDALA inline GPUID getlocation() const {return location;} void moveto(const GPUID dest); #else inline GPUID getlocation() const {return cpu;} void moveto(const GPUID dest) {}; #endif //! fill the matrix with pseudorandom numbers (uniform distribution) void randomize(const typename LA_traits::normtype &x); //! assigment operator performing shallow copy NRMat & operator=(const NRMat &rhs); //! assigment operator performing deep copy NRMat & operator|=(const NRMat &rhs); //! assign scalar value to the diagonal elements NRMat & operator=(const T &a); //! unit matrix for general power void identity() {*this = (T)1;} //! inverse matrix NRMat inverse(); //! add scalar value to the diagonal elements NRMat & operator+=(const T &a); //! subtract scalar value to the diagonal elements NRMat & operator-=(const T &a); //! multiply by a scalar value NRMat & operator*=(const T &a); //! add given matrix NRMat & operator+=(const NRMat &rhs); //! subtract given matrix NRMat & operator-=(const NRMat &rhs); //! Hadamard element-wise product NRMat & operator^=(const NRMat &rhs); //! add symmetric matrix stored in packed form NRMat & operator+=(const NRSMat &rhs); //! subtract symmetric matrix stored in packed form NRMat & operator-=(const NRSMat &rhs); //! unary minus const NRMat operator-() const; //! add scalar value to all matrix elements and return the result by value inline const NRMat operator+(const T &a) const; //! subtract scalar value from all matrix elements and return the result by value inline const NRMat operator-(const T &a) const; //! multiply all matrix elements by a scalar value and return the result by value inline const NRMat operator*(const T &a) const; //! add given matrix and return the result by value inline const NRMat operator+(const NRMat &rhs) const; //! add given symmetric matrix stored in packed form and return the result by value inline const NRMat operator+(const NRSMat &rhs) const; //! subtract given matrix and return the result by value inline const NRMat operator-(const NRMat &rhs) const; //! subtract given symmetric matrix stored in packed form and return the result by value inline const NRMat operator-(const NRSMat &rhs) const; //! multiply by given matrix and return the result by value const NRMat operator*(const NRMat &rhs) const; //! multiply by given symmetric matrix stored in packed form and return the result by value const NRMat operator*(const NRSMat &rhs) const; //! direct sum of two matrices const NRMat operator&(const NRMat &rhs) const; //! direct product of two matrices const NRMat operator|(const NRMat &rhs) const; //! multiply by a vector const NRVec operator*(const NRVec &rhs) const { NRVec result(nn, rhs.getlocation()); result.gemv((T)0, *this, 'n', (T)1, rhs); return result; }; //! multiply this matrix of general type T by vector of type complex const NRVec > operator*(const NRVec > &rhs) const { NRVec > result(nn, rhs.getlocation()); result.gemv((T)0, *this, 'n', (T)1, rhs); return result; }; //! inner product of two matrices (taking conjugation into account in the complex case) const T dot(const NRMat &rhs) const; //! direct sum const NRMat oplus(const NRMat &rhs) const; //! direct product const NRMat otimes(const NRMat &rhs, bool reversecolumns = false) const; //! multiply by diagonal matrix from left void diagmultl(const NRVec &rhs); //! multiply by diagonal matrix from right void diagmultr(const NRVec &rhs); //! for this matrix \f$A\f$ compute \f$A^\mathrm{T}\cdot{}A\f$ const NRSMat transposedtimes() const; //! for this matrix \f$A\f$ compute \f$A\cdot{}A^\mathrm{T}\f$ const NRSMat timestransposed() const; //! sum numbers in the rows (sum over column index) const NRVec rsum() const; //! sum numbers in the columns (sum over row index) const NRVec csum() const; //! orthonormalize this matrix void orthonormalize(const bool rowcol, const NRSMat *metric = NULL); //! get the ith row const NRVec row(const int i, int l = -1) const; //! get the jth column const NRVec column(const int j, int l = -1) const { NOT_GPU(*this); if(l < 0) l = nn; NRVec r(l); for(register int i=0; i &, const bool divide = 0, bool cache = false) const; //! set diagonal elements void diagonalset(const NRVec &); //! perform the gemv operation with vector of type T void gemv(const T beta, NRVec &r, const char trans, const T alpha, const NRVec &x) const { r.gemv(beta, *this, trans, alpha, x); }; //! perform the gemv operation with vector of type complex void gemv(const T beta, NRVec > &r, const char trans, const T alpha, const NRVec > &x) const { r.gemv(beta, *this, trans, alpha, x); }; //! determine the pointer to the ith row inline T* operator[](const int i); //! determine the const pointer to the ith row inline const T* operator[](const int i) const; //! get the reference to the element with indices (i,j) inline T& operator()(const int i, const int j); //! get the const reference to the element with indices (i,j) inline const T& operator()(const int i, const int j) const; //! get the copy of the element with indices (i,j) inline const T get_ij(const int i, const int j) const; //! get the number of rows inline int nrows() const; //! get the number of columns inline int ncols() const; //! get the number of matrix elements inline size_t size() const; //! unformatted input void get(int fd, bool dimensions = 1, bool transposed = false); //! unformatted output void put(int fd, bool dimensions = 1, bool transposed = false) const; //! formatted output void fprintf(FILE *f, const char *format, const int modulo) const; //! formatted input void fscanf(FILE *f, const char *format); //! set all matrix elements equal to zero void clear(){ if(nn&&mm){ copyonwrite(LA_traits::is_plaindata()); LA_traits::clear((*this)[0], (size_t)nn*mm); } }; //! resize the matrix void resize(int n, int m); //! deallocate the matrix void dealloc(void) {resize(0,0);} //! get the pointer to the data inline operator T*(); //! get the const pointer to the data inline operator const T*() const; //! in case of square matrix, transpose the leading minor of order n NRMat& transposeme(const int n = 0); //! conjugate a square matrix NRMat& conjugateme(); //! transpose this matrix and return the result by value const NRMat transpose(bool conj = false) const; //! conjugate this matrix and return the result by value const NRMat conjugate() const {NRMat r(*this); r.conjugateme(); return r;}; //! extract specified submatrix const NRMat submatrix(const int fromrow, const int torow, const int fromcol, const int tocol) const; const NRMat submatrix(const NRVec &rows, const NRVec &cols) const; //! store given matrix at given position into the current matrix void storesubmatrix(const int fromrow, const int fromcol, const NRMat &rhs); void storesubmatrix(const NRVec &rows, const NRVec &cols, const NRMat &rhs); //! perform the \b gemm operation void gemm(const T &beta, const NRMat &a, const char transa, const NRMat &b, const char transb, const T &alpha); //! compute the norm of this matrix const typename LA_traits::normtype norm(const T scalar = (T)0) const; //! add up a scalar multiple of given matrix to the current matrix void axpy(const T alpha, const NRMat &x); //! maximal element in the absolute value inline const T amax() const; //! minimal element in the absolute value inline const T amin() const; //! determine the sum of the diagonal elements const T trace() const; //! swap the order of the rows of the current matrix NRMat & swap_rows(); //! swap the order of the columns of the current matrix NRMat & swap_cols(); //! swap the order of the rows and columns of the current matrix NRMat & swap_rows_cols(); // LV - swapping of columns i and j NRMat & swap_cols(const int i, const int j); // LV - swapping of rows i and j NRMat & swap_rows(const int i, const int j); //rotate rows or columns through an angle NRMat & rotate_cols(const int i, const int j, const T phi); NRMat & rotate_rows(const int i, const int j, const T phi); //! multiply by sparse matrix SparseSMat operator*(const SparseSMat &rhs) const; //! explicit constructor converting sparse matrix into \c NRMat object explicit NRMat(const SparseMat &rhs); // dense from sparse //! explicit constructor converting sparse symmetric matrix into \c NRMat object explicit NRMat(const SparseSMat &rhs); //! explicit constructor converting sparse CSR matrix into \c NRMat object explicit NRMat(const CSRMat &rhs); //! add up given sparse matrix NRMat & operator+=(const SparseMat &rhs); //! subtract given sparse matrix NRMat & operator-=(const SparseMat &rhs); //! perform the \b gemm operation void gemm(const T &beta, const SparseMat &a, const char transa, const NRMat &b, const char transb, const T &alpha); inline void simplify() {}; bool issymmetric() const { return 0; }; const typename LA_traits::normtype nonsymmetry() const; const typename LA_traits::normtype nonhermiticity() const; #ifndef NO_STRASSEN //! Strassen's multiplication (better than \f$\mathacal{O}(n^3)\f$, analogous syntax to \see NRMat::gemm() ) void strassen(const T beta, const NRMat &a, const char transa, const NRMat &b, const char transb, const T alpha); void s_cutoff(const int,const int,const int,const int) const; #endif void swap(NRMat &rhs) //more efficient swap than via tmp and constructors and operator= { int tmpnn=nn; nn=rhs.nn; rhs.nn=tmpnn; int tmpmm=mm; mm=rhs.mm; rhs.mm=tmpmm; #ifdef MATPTR T ** #else T * #endif tmpv=v; v=rhs.v; rhs.v=tmpv; int *tmpcount=count; count=rhs.count; rhs.count=tmpcount; #ifdef CUDALA GPUID tmplocation=location; location=rhs.location; rhs.location=tmplocation; #endif } NRMat::normtype> abs() const; //elementwise absolute values NRMat::normtype> diffabs(const NRMat &rhs) const; //difference of absolute values }; }//namespace //due to mutual includes this has to be after full class declaration #include "vec.h" #include "smat.h" #include "sparsemat.h" #include "sparsesmat.h" #include "permutation.h" namespace LA { /***************************************************************************//** * matrix constructor * @param[in] n number of rows of the matrix being created * @param[in] m number of cols of the matrix being created * @param[in] loc location for storing the matrix * @see count, v, location ******************************************************************************/ template NRMat::NRMat(const int n, const int m, const GPUID loc) : nn(n), mm(m), count(new int) { T* p; *count = 1; const size_t nm = (size_t)n*m; #ifdef CUDALA location = (loc==undefined?DEFAULT_LOC:loc); if(location == cpu) { #endif #ifdef MATPTR v = new T*[n]; p = v[0] = new T[nm]; for (int i=1; i NRMat::NRMat(const T &a, const int n, const int m, const GPUID loc) : nn(n), mm(m), count(new int) { const size_t nm = (size_t)n*m; T *p; *count = 1; #ifdef CUDALA location = (loc==undefined?DEFAULT_LOC:loc); if(location==cpu){ #endif #ifdef MATPTR v = new T*[n]; p = v[0] = new T[nm]; for (register int i=1; i::is_plaindata() || a != (T)0){ for (register int i=0; i::is_plaindata()) laerror("only implemented for plain data"); smart_gpu_set(nm, a, v); } #endif } /***************************************************************************//** * inline constructor creating matrix from an array ******************************************************************************/ template template inline NRMat::NRMat(const T (&a)[R][C]) : count(new int) { nn = R; mm = C; *count = 1; #ifdef CUDALA if(location==cpu){ #endif #ifdef MATPTR v = new T*[nn]; v[0] = new T[nn*mm]; for (register int i=1; i::is_plaindata()) memcpy(v[0], a, nn*mm*sizeof(T)); else for( int i=0; i::is_plaindata()) memcpy(v, a, nn*mm*sizeof(T)); else for( int i=0; i::is_plaindata()) laerror("only implemented for plain data"); v = (T*) gpualloc(nn*mm*sizeof(T)); cublasSetVector(nm, sizeof(T), a, 1, v, 1); } #endif } /***************************************************************************//** * matrix constructor * @param[in] a value of type T intended for matrix inicialization * @param[in] n number of rows of the matrix being created * @param[in] m number of cols of the matrix being created * @see count, v ******************************************************************************/ template NRMat::NRMat(const T &a, const int n, const int m) : nn(n), mm(m), count(new int) { const size_t nm = (size_t)n*m; T *p; *count = 1; #ifdef CUDALA location = DEFAULT_LOC; if(location==cpu){ #endif #ifdef MATPTR v = new T*[n]; p = v[0] = new T[nm]; for (register int i=1; i::is_plaindata() || a != (T)0) for (register int i=0; i::is_plaindata()) laerror("only implemented for plain data"); smart_gpu_set(nm, a, v); } #endif } /***************************************************************************//** * matrix constructor * @param[in] a pointer to values of type T intended for matrix inicialization * @param[in] n number of rows of the matrix being created * @param[in] m number of cols of the matrix being created * @see count, v ******************************************************************************/ template NRMat::NRMat(const T *a, const int n, const int m) : nn(n), mm(m), count(new int) { const size_t nm = (size_t)n*m; #ifdef CUDALA location = DEFAULT_LOC; #endif *count = 1; #ifdef CUDALA if(location==cpu){ #endif #ifdef MATPTR v = new T*[n]; v[0] = new T[nm]; for (register int i=1; i::is_plaindata()) memcpy(v[0], a, nm*sizeof(T)); else for(int i=0; i::is_plaindata()) memcpy(v, a, nm*sizeof(T)); else for(int i=0; i::is_plaindata()) laerror("only implemented for plain data"); cublasSetVector(nm, sizeof(T), a, 1, v, 1); } #endif } /***************************************************************************//** * copy constructor implementing shallow copy * @param[in] rhs reference object to be copied * @see count, v ******************************************************************************/ template NRMat::NRMat(const NRMat &rhs) { #ifdef CUDALA location = rhs.location; #endif nn = rhs.nn; mm = rhs.mm; count = rhs.count; v = rhs.v; if (count) ++(*count); } /***************************************************************************//** * create matrix from a \c NRSMat object * @param[in] rhs \c NRSMat input object to be converted * @see count, v, vec.h, NRSMat ******************************************************************************/ template NRMat::NRMat(const NRSMat &rhs) { NOT_GPU(rhs); #ifdef CUDALA location = rhs.location; #endif int i(0), j(0), k(0); nn = mm = rhs.nrows(); count = new int; *count = 1; #ifdef MATPTR v = new T*[nn]; v[0] = new T[(size_t)mm*nn]; for (int i=1; i NRMat::NRMat(const NRVec &rhs, const int n, const int m, const int offset) { if (offset < 0 || (size_t)n*m + offset > rhs.nn) laerror("matrix dimensions and offset incompatible with vector length"); if(offset!=0) std::cout << "dangerous: if the underlying vector is deallocated before the matrix, wrong delete[] will result for nonzero offset!!!\n"; #ifdef CUDALA location=rhs.location; #endif nn = n; mm = m; count = rhs.count; v = rhs.v + offset;//!< make just shallow copy (*count)++;//!< therefore increase the reference counter } #endif /***************************************************************************//** * \c NRMat + \c NRSmat via operator += * @param[in] rhs NRSMat matrix to be subtracted from current matrix * @return result of the subtraction * @see NRMat::operator+=(const NRSMat &) ******************************************************************************/ template inline const NRMat NRMat::operator+(const NRSMat &rhs) const { return NRMat(*this) += rhs; } /***************************************************************************//** * \c NRMat - \c NRSmat via operator -= * @param[in] rhs NRSMat matrix to be subtracted from current matrix * @return result of the subtraction * @see NRMat::operator-=(const NRSMat &) ******************************************************************************/ template inline const NRMat NRMat::operator-(const NRSMat &rhs) const { return NRMat(*this) -= rhs; } /***************************************************************************//** * @param[in] i row number * @return pointer to the first element in the i-th row ******************************************************************************/ template inline T* NRMat::operator[](const int i) { #ifdef DEBUG if (_LA_count_check && *count != 1) laerror("matrix with *count>1 used as l-value"); if (i < 0 || i >= nn) laerror("Mat [] out of range"); if (!v) laerror("unallocated matrix"); #endif #ifdef MATPTR return v[i]; #else return v + i*(size_t)mm; #endif } /***************************************************************************//** * @param[in] i row number * @return const pointer to the first element in the i-th row ******************************************************************************/ template inline const T* NRMat::operator[](const int i) const { #ifdef DEBUG if (i < 0 || i >= nn) laerror("index out of range"); if (!v) laerror("unallocated matrix"); #endif NOT_GPU(*this); #ifdef MATPTR return v[i]; #else return v + i*(size_t)mm; #endif } /***************************************************************************//** * for a given matrix \f$A\f$, determine the element with indices (i,j) * @param[in] i row number * @param[in] j col number * @return reference to \f$A_{i,j}\f$ * @see NRMat::count ******************************************************************************/ template inline T& NRMat::operator()(const int i, const int j){ #ifdef DEBUG if (_LA_count_check && *count != 1) laerror("NRMat::operator(,) used as l-value for a matrix with count > 1"); if (i < 0 || i >= nn && nn > 0 || j < 0 || j >= mm && mm > 0) laerror("index out of range"); if (!v) laerror("unallocated matrix"); #endif NOT_GPU(*this); #ifdef MATPTR return v[i][j]; #else return v[i*(size_t)mm + j]; #endif } /***************************************************************************//** * for a given matrix \f$A\f$, determine the element with indices (i,j) * @param[in] i row number * @param[in] j col number * @return const reference to \f$A_{i,j}\f$ ******************************************************************************/ template inline const T& NRMat::operator()(const int i, const int j) const{ T ret; #ifdef DEBUG if (i<0 || i>=nn && nn>0 || j<0 || j>=mm && mm>0) laerror("index out of range"); if (!v) laerror("unallocated matrix"); #endif NOT_GPU(*this); #ifdef MATPTR return v[i][j]; #else return v[i*(size_t)mm + j]; #endif } /***************************************************************************//** * for a given matrix \f$A\f$, determine the element with indices (i,j) * @param[in] i row number * @param[in] j col number * @return const reference to \f$A_{i,j}\f$ ******************************************************************************/ template inline const T NRMat::get_ij(const int i, const int j) const{ T ret; #ifdef DEBUG if (i<0 || i>=nn || j<0 || j>=mm) laerror("index out of range"); if (!v) laerror("unallocated matrix"); #endif #ifdef CUDALA if(location == cpu){ #endif #ifdef MATPTR return v[i][j]; #else return v[i*(size_t)mm + j]; #endif #ifdef CUDALA }else{ const size_t pozice = i*(size_t)mm + j; gpuget(1, sizeof(T), v + pozice, &ret); return ret; } #endif } /***************************************************************************//** * @return number of rows ******************************************************************************/ template inline int NRMat::nrows() const{ return nn; } /***************************************************************************//** * @return number of columns ******************************************************************************/ template inline int NRMat::ncols() const{ return mm; } /***************************************************************************//** * @return number of elements ******************************************************************************/ template inline size_t NRMat::size() const{ return (size_t)nn*mm; } /***************************************************************************//** * @return pointer of general type T to the underlying data structure ******************************************************************************/ template inline NRMat::operator T*(){ #ifdef DEBUG if (!v) laerror("unallocated matrix"); #endif #ifdef MATPTR return v[0]; #else return v; #endif } /***************************************************************************//** * @return const pointer of general type T to the underlying data ******************************************************************************/ template inline NRMat::operator const T*() const{ #ifdef DEBUG if (!v) laerror("unallocated matrix"); #endif #ifdef MATPTR return v[0]; #else return v; #endif } /***************************************************************************//** * for this real matrix \f$A\f$, determine the first element * with largest absolute value * @return \f$A_{l,m}\f$ which maximizes \f$\left|A_{i,j}\right|\f$ ******************************************************************************/ template<> inline const double NRMat::amax() const{ #ifdef CUDALA if(location == cpu){ #endif #ifdef MATPTR return v[0][cblas_idamax(nn*mm, v[0], 1) - 1]; #else return v[cblas_idamax(nn*mm, v, 1) - 1]; #endif #ifdef CUDALA }else{ double ret(0.0); const size_t pozice = cublasIdamax((size_t)nn*mm, v, 1) - 1; TEST_CUBLAS("cublasIdamax"); gpuget(1, sizeof(double), v + pozice, &ret); return ret; } #endif } /***************************************************************************//** * for this real matrix \f$A\f$, determine the first element * with smallest absolute value * @return \f$A_{l,m}\f$ which minimizes \f$\left|A_{i,j}\right|\f$ ******************************************************************************/ template<> inline const double NRMat::amin() const{ double ret(0.0); #ifdef CUDALA if(location == cpu){ #endif // idamin seems not to be supported const size_t nm = (size_t)nn*mm; double val(0.0); int index(-1); ret = std::numeric_limits::max(); for(register int i=0; i < nm; i++){ #ifdef MATPTR val = std::abs(v[0][i]); #else val = std::abs(v[i]); #endif if(val < ret){ index = i; ret = val; } } #ifdef MATPTR ret = v[0][index]; #else ret = v[index]; #endif #ifdef CUDALA }else{ const size_t pozice = cublasIdamin((size_t)nn*mm, v, 1) - 1; TEST_CUBLAS("cublasIdamin"); gpuget(1, sizeof(double), v + pozice, &ret); } #endif return ret; } /***************************************************************************//** * for this complex matrix \f$A\f$, determine the smallest index of the maximum * magnitude element, i.e. maximal element in the 1-norm * @return \f$A_{l,m}\f$ which maximizes \f$\left\{\left|\Re{}A_{i,j}\right|+\left|\Im{}A_{i,j}\right|\right}\f$ ******************************************************************************/ template<> inline const std::complex NRMat >::amax() const{ #ifdef CUDALA if(location == cpu){ #endif #ifdef MATPTR return v[0][cblas_izamax(nn*mm, v[0], 1) - 1]; #else return v[cblas_izamax(nn*mm, v, 1) - 1]; #endif #ifdef CUDALA }else{ std::complex ret(0.0, 0.0); const size_t pozice = cublasIzamax((size_t)nn*mm, (cuDoubleComplex*)v, 1) - 1; TEST_CUBLAS("cublasIzamax"); gpuget(1, sizeof(std::complex), v + pozice, &ret); return ret; } #endif } /***************************************************************************//** * for this complex matrix \f$A\f$, determine the smallest index of the minimum * magnitude element, i.e. minimal element in the 1-norm * @return \f$A_{l,m}\f$ which minimizes \f$\left\{\left|\Re{}A_{i,j}\right|+\left|\Im{}A_{i,j}\right|\right}\f$ ******************************************************************************/ template<> inline const std::complex NRMat >::amin() const{ std::complex ret(0.0, 0.0); #ifdef CUDALA if(location == cpu){ #endif // idamin seems not to be supported const size_t nm = (size_t)nn*mm; int index(-1); double val(0.0), min_val(0.0); std::complex z_val(0.0, 0.0); min_val = std::numeric_limits::max(); for(register int i=0; i < nm; i++){ #ifdef MATPTR z_val = v[0][i]; #else z_val = v[i]; #endif val = std::abs(z_val.real()) + std::abs(z_val.imag()); if(val < min_val){ index = i; min_val = val; } } #ifdef MATPTR ret = v[0][index]; #else ret = v[index]; #endif #ifdef CUDALA }else{ const size_t pozice = cublasIzamin((size_t)nn*mm, (cuDoubleComplex*)v, 1) - 1; TEST_CUBLAS("cublasIzamin"); gpuget(1, sizeof(std::complex), v + pozice, &ret); } #endif return ret; } /***************************************************************************//** * destructor for general type * @see NRMat::count ******************************************************************************/ template NRMat::~NRMat() { if(!count) return; if(--(*count) <= 0) { if (v){ #ifdef CUDALA if(location == cpu){ #endif #ifdef MATPTR delete[] (v[0]); #endif delete[] v; #ifdef CUDALA }else{ gpufree(v); } #endif } delete count; } } /***************************************************************************//** * assigment operator for general type between NRMat and NRMat * @see count * @return reference to the newly assigned matrix ******************************************************************************/ template NRMat & NRMat::operator=(const NRMat &rhs) { if (this != &rhs){ if (count){ if (--(*count) ==0 ){ #ifdef CUDALA if(location == cpu){ #endif #ifdef MATPTR delete[] (v[0]); #endif delete[] v; #ifdef CUDALA }else{ gpufree(v); } #endif delete count; } } v = rhs.v; #ifdef CUDALA location = rhs.location; #endif nn = rhs.nn; mm = rhs.mm; count = rhs.count; if(count) (*count)++; } return *this; } /***************************************************************************//** * perform an explicit deep copy of \c NRMat object * @see count * @return reference to the newly copied matrix ******************************************************************************/ template NRMat & NRMat::operator|=(const NRMat &rhs) { if(this == &rhs) return *this; // test to avoid self-assignment *this = rhs; this->copyonwrite(); return *this; } /***************************************************************************//** * create own deep copy * @see NRMat::count, NRMat::operator|=() ******************************************************************************/ template void NRMat::copyonwrite(bool detachonly, bool deep) { if(!count) { if(nn||mm) laerror("nonempty matrix without reference count encountered"); if(_LA_warn_empty_copyonwrite) std::cout <<"Warning: copyonwrite of empty matrix\n"; return; } if(*count == 1 && !LA_traits::is_plaindata() && !detachonly &&deep) //type-nested copyonwrite { #ifdef CUDALA if(location != cpu) laerror("nested types not supported on gpu memory"); #endif for(int i=0; i::copyonwrite(v[i]); } if(*count > 1){ (*count)--; count = new int; *count = 1; #ifdef CUDALA if(location == cpu){ //matrix is in CPU memory #endif #ifdef MATPTR T **newv = new T*[nn]; newv[0] = new T[(size_t)mm*nn]; if(!detachonly) { if(LA_traits::is_plaindata()) memcpy(newv[0], v[0], (size_t)mm*nn*sizeof(T)); else { for(int i=0; i::copyonwrite(newv[i]);} } } v = newv; for(register int i=1; i::is_plaindata()) memcpy(newv, v, (size_t)mm*nn*sizeof(T)); else { for(int i=0; i::copyonwrite(newv[i]);} } } v = newv; #endif #ifdef CUDALA }else{ //matrix is in GPU memory if(!LA_traits::is_plaindata()) laerror("nested types not supported on gpu memory"); T *newv = (T *) gpualloc((size_t)mm*nn*sizeof(T)); if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem"); if(!detachonly) cublasScopy(nn*mm*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1); TEST_CUBLAS("cublasScopy"); v = newv; } #endif } } /***************************************************************************//** * resize given matrix * @param[in] n number of rows * @param[in] m number of cols * @see count, NRMat::copyonwrite(), NRMat::operator|=() * @return reference to the newly copied matrix ******************************************************************************/ //NOTE it must not be used in constructors when the data were not initialized yet template void NRMat::resize(int n, int m) { #ifdef DEBUG if (n<0 || m<0) laerror("illegal dimensions"); #endif //allow trivial dimensions if(n == 0 || m == 0) m = n =0; if(count){ if(n==0 && m==0){ if(--(*count) <= 0){ #ifdef CUDALA if(location==cpu){ #endif #ifdef MATPTR if(v) delete[] (v[0]); #endif if(v) delete[] v; #ifdef CUDALA } else { gpufree(v); } #endif delete count; } count = 0; nn = mm = 0; v = 0; return; } /* if we have more than one reference to this matrix, set count to NULL in order to reach the if-branch below where new memory resources are allocated */ if(*count > 1){ (*count)--; count = 0; nn = mm = 0; v = 0; } } if(!count){ count = new int; *count = 1; nn = n; mm = m; #ifdef CUDALA if(location==cpu){ #endif #ifdef MATPTR v = new T*[nn]; v[0] = new T[(size_t)m*n]; for (register int i=1; i< n; i++) v[i] = v[i-1] + m; #else v = new T[(size_t)m*n]; #endif #ifdef CUDALA }else{ v = (T *) gpualloc((size_t)n*m*sizeof(T)); } #endif return; } // at this point *count = 1, check if resize is necessary if (n != nn || m != mm) { nn = n; mm = m; #ifdef CUDALA if(location==cpu){ #endif #ifdef MATPTR delete[] (v[0]); #endif delete[] v; #ifdef MATPTR v = new T*[nn]; v[0] = new T[(size_t)m*n]; for (int i=1; i< n; i++) v[i] = v[i-1] + m; #else v = new T[(size_t)m*n]; #endif #ifdef CUDALA }else{ gpufree(v); v=(T *) gpualloc((size_t)n*m*sizeof(T)); } #endif } } /***************************************************************************//** * complexify a given matrix \f$A\f$ * @param[in] rhs matrix \f$A\f$ intended for this operation * @return matrix \f$B\f$ where \f$\Re B=A\f$ and \f$\Im B = 0\f$ ******************************************************************************/ template NRMat > complexify(const NRMat &rhs) { NOT_GPU(rhs); NRMat > r(rhs.nrows(), rhs.ncols(), rhs.getlocation()); for(register int i=0; i NRMat > complexify(const NRMat &rhsr, const NRMat &rhsi) { NOT_GPU(rhsr); NOT_GPU(rhsi); if(rhsr.nrows()!=rhsi.nrows() || rhsr.ncols()!=rhsi.ncols()) laerror("inconsistent dimensions in complexify"); NRMat > r(rhsr.nrows(), rhsr.ncols(), rhsr.getlocation()); for(register int i=0; i(rhsr(i,j),rhsi(i,j)); } return r; } /***************************************************************************//** * output operator * @param[in,out] s output stream * @param[in] x NRMat matrix to be prited out * @return modified stream ******************************************************************************/ template std::ostream& operator<<(std::ostream &s, const NRMat &x) { #ifdef CUDALA if(x.getlocation() == cpu){ #endif int i(0),j(0); int n(x.nrows()), m(x.ncols()); s << n << ' ' << m << '\n'; for(i=0; i::IOtype) x[i][j] << (j==m-1 ? '\n' : ' '); } } return s; #ifdef CUDALA }else{ NRMat tmp = x; tmp.moveto(cpu); return s << tmp; } #endif } /***************************************************************************//** * input operator * @param[in,out] s input stream * @param[in] x NRMat matrix for storing the input * @return modified stream ******************************************************************************/ template std::istream& operator>>(std::istream &s, NRMat &x) { #ifdef CUDALA if(x.getlocation() == cpu){ #endif int i(0), j(0), n(0), m(0); s >> n >> m; x.resize(n, m); typename LA_traits_io::IOtype tmp; for(i=0;i> tmp; x[i][j] = tmp; } } return s; #ifdef CUDALA }else{ NRMat tmp; tmp.moveto(cpu); s >> tmp; tmp.moveto(x.getlocation()); x = tmp; return s; } #endif } /***************************************************************************//** * implements \c NRMat functionality with indexing from 1 * all possible constructors have to be given explicitly, other stuff is inherited * with exception of the operator() which differs ******************************************************************************/ template class NRMat_from1 : public NRMat { public: NRMat_from1(): NRMat() {}; template explicit NRMat_from1(const T (&a)[R][C]) : NRMat(a) {}; explicit NRMat_from1(const int n): NRMat(n) {}; explicit NRMat_from1(const NRSMat_from1 &rhs) : NRMat(rhs) {}; NRMat_from1(const NRMat &rhs): NRMat(rhs) {};//!< be able to convert the parent class transparently to this NRMat_from1(const int n, const int m): NRMat(n, m) {}; NRMat_from1(const T &a, const int n, const int m): NRMat(a, n, m) {}; NRMat_from1(const T *a, const int n, const int m): NRMat(a, n, m) {}; inline const T& operator() (const int i, const int j) const { #ifdef DEBUG if (i<1 || i>NRMat::nn || j<1 || j>NRMat::mm) laerror("index out of range"); if (!NRMat::v) laerror("unallocated matrix"); #endif NOT_GPU(*this); #ifdef MATPTR return NRMat::v[i - 1][j - 1]; #else return NRMat::v[(i-1)*(size_t)NRMat::mm+j-1]; #endif } inline T& operator() (const int i, const int j) { #ifdef DEBUG if (_LA_count_check && *NRMat::count != 1) laerror("matrix with *count > 1 used as l-value"); if (i<1 || i>NRMat::nn || j<1 || j>NRMat::mm) laerror("index out of range"); if (!NRMat::v) laerror("unallocated matrix"); #endif NOT_GPU(*this); #ifdef MATPTR return NRMat::v[i-1][j-1]; #else return NRMat::v[(i-1)*NRMat::mm+j-1]; #endif } inline const T get_ij(const int i, const int j) const { T ret; #ifdef DEBUG if (i<1 || i>NRMat::nn || j<1 || j>NRMat::mm) laerror("index out of range"); if (!NRMat::v) laerror("unallocated matrix"); #endif #ifdef CUDALA if(NRMat::location == cpu){ #endif #ifdef MATPTR return NRMat::v[i - 1][j - 1]; #else return NRMat::v[(size_t)(i-1)*NRMat::mm + (j-1)]; #endif #ifdef CUDALA }else{ const size_t pozice = (size_t)(i-1)*NRMat::mm + (j-1); gpuget(1, sizeof(T), NRMat::v + pozice, &ret); return ret; } #endif } }; /***************************************************************************//** * compute Hadamard (component-wise) product with a given matrix \f$A\f$ * @param[in] rhs matrix \f$A\f$ * @see count, operator* * @return reference to the multiplied matrix ******************************************************************************/ template NRMat& NRMat::operator^=(const NRMat &rhs){ #ifdef DEBUG if (nn != rhs.nn || mm != rhs.mm) laerror("incompatible matrices"); #endif SAME_LOC(*this, rhs); NOT_GPU(*this); copyonwrite();// ensure that *count == 1 #ifdef MATPTR for (register size_t i=0; i< (size_t)nn*mm; i++) v[0][i] *= rhs.v[0][i]; #else const size_t Dim = (size_t)nn*mm; for(register size_t i=0; i void NRMat::moveto(const GPUID dest) { if(location == dest) return;// no operation is necessary /* currently, only movements between CPU and GPU are implemented CUBLAS seems to lack support for multiple GPUs */ CPU_GPU(location, dest); location = dest; if(v && !count) laerror("internal inconsistency of reference counting 1"); if (!count) return; if(v && *count==0) laerror("internal inconsistency of reference counting 2"); if(!v) return; T *vold = v; if(dest == cpu){ //moving from GPU to CPU v = new T[(size_t)nn*mm]; gpuget((size_t)nn*mm, sizeof(T), vold, v); if(*count == 1){ gpufree(vold); } else{ --(*count); count = new int(1); } }else{ //moving from CPU to GPU v = (T *) gpualloc((size_t)nn*mm*sizeof(T)); gpuput((size_t)nn*mm, sizeof(T), vold, v); if(*count == 1) delete[] vold; else{ --(*count); count = new int(1);} } } #endif /***************************************************************************//** * add a given general matrix (type T) \f$A\f$ to the current complex matrix * @param[in] rhs matrix \f$A\f$ of type T * @return reference to the modified matrix ******************************************************************************/ template NRMat & NRMat::operator+=(const NRMat &rhs) { #ifdef DEBUG if (nn != rhs.nn || mm != rhs.mm) laerror("incompatible matrices"); #endif SAME_LOC(*this, rhs); NOT_GPU(*this); copyonwrite(); #ifdef MATPTR for(size_t i=0; i< (size_t)nn*mm; i++) v[0][i] += rhs.v[0][i]; #else for(size_t i=0; i< (size_t)nn*mm; i++) v[i] += rhs.v[i]; #endif return *this; } /***************************************************************************//** * subtract a given general matrix (type T) \f$A\f$ from the current matrix * @param[in] rhs matrix \f$A\f$ of type T * @return reference to the modified matrix ******************************************************************************/ template NRMat & NRMat::operator-=(const NRMat &rhs) { #ifdef DEBUG if (nn != rhs.nn || mm != rhs.mm) laerror("incompatible matrices"); #endif SAME_LOC(*this, rhs); NOT_GPU(*this); copyonwrite(); #ifdef MATPTR for(size_t i=0; i< (size_t)nn*mm; i++) v[0][i] -= rhs.v[0][i]; #else for(size_t i=0; i<(size_t) nn*mm; i++) v[i] -= rhs.v[i]; #endif return *this; } /*difference of absolute values*/ template NRMat::normtype> NRMat::diffabs(const NRMat &rhs) const { #ifdef DEBUG if (nn != rhs.nn ||mm!=rhs.mm) laerror("incompatible dimensions"); #endif NOT_GPU(*this); NOT_GPU(rhs); NRMat::normtype> r(nn,mm); for(size_t i=0; i< nn; i++) for(size_t j=0; j< mm; j++) r(i,j) = MYABS((*this)(i,j)) - MYABS(rhs(i,j)); return r; } /*elementwise absolute values*/ template NRMat::normtype> NRMat::abs() const { NOT_GPU(*this); NRMat::normtype> r(nn,mm); for(size_t i=0; i< nn; i++) for(size_t j=0; j< mm; j++) r(i,j) = MYABS((*this)(i,j)); return r; } //convert whole matrix between types template void NRMat_convert(NRMat &a, const NRMat &b) { a.resize(b.nrows(),b.ncols()); for(int i=0; i