//------------------------------------------------------------------------------ /* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */ //------------------------------------------------------------------------------ /* LA: linear algebra C++ interface library Copyright (C) 2008 Jiri Pittner or complex versions written by Roman Curik This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "smat.h" #include #include #include #include #include #include #include namespace LA { /***************************************************************************//** * routine for raw output * @param[in] fd file descriptor for output * @param[in] dim number of elements intended for output * @param[in] transp reserved * @see NRMat::get(), NRSMat::copyonwrite() ******************************************************************************/ template void NRSMat::put(int fd, bool dim, bool transp) const { #ifdef CUDALA if(location != cpu){ NRSMat tmp= *this; tmp.moveto(cpu); tmp.put(fd,dim,transp); return; } #endif errno = 0; if(dim){ if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write"); if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("cannot write"); } LA_traits::multiput((size_t)nn*(nn+1)/2,fd,v,dim); } /***************************************************************************//** * routine for raw input * @param[in] fd file descriptor for input * @param[in] dim number of elements intended for input * @param[in] transp reserved * @see NRSMat::put(), NRSMat::copyonwrite() ******************************************************************************/ template void NRSMat::get(int fd, bool dim, bool transp) { #ifdef CUDALA if(location != cpu){ NRSMat tmp; tmp.moveto(cpu); tmp.get(fd,dim,transp); tmp.moveto(location); *this = tmp; return; } #endif int nn0[2]; //align at least 8-byte errno = 0; if(dim){ if(2*sizeof(int) != read(fd,&nn0,2*sizeof(int))) laerror("cannot read"); resize(nn0[0]); }else{ copyonwrite(); } LA_traits::multiget((size_t)nn*(nn+1)/2,fd,v,dim); } /***************************************************************************//** * constructor symmetrizing given matrix \f$A\f$ of general type T yielding \f$(A+A^\mathrm{T})/2\f$ * @param[in] rhs matrix \f$A\f$ ******************************************************************************/ template NRSMat::NRSMat(const NRMat &rhs) { NOT_GPU(rhs); nn = rhs.nrows(); #ifdef DEBUG if(nn != rhs.ncols()) laerror("attempt to convert nonsquare NRMat to NRSMat"); #endif #ifdef CUDALA location = rhs.getlocation(); #endif count = new int; *count = 1; v = new T[NN2]; int i, j, k(0); for(i=0; iT and then set * the diagonal elements to prescribed value * @param[in] a scalar value to be assigned to the diagonal * @return reference to the modified matrix ******************************************************************************/ template NRSMat & NRSMat::operator=(const T &a) { NOT_GPU(*this); copyonwrite(); memset(v, 0, NN2*sizeof(T)); for(register int i=0; idivide == true NULL * \li divide == false pointer to the first element of r ******************************************************************************/ template const T* NRSMat::diagonalof(NRVec &r, const bool divide, bool cache) const { #ifdef DEBUG if(r.size() != nn) laerror("incompatible vector in const T* NRSMat::diagonalof(NRVec &, const bool, bool)"); #endif NOT_GPU(*this); SAME_LOC(*this, r); r.copyonwrite(); if(divide){ for(register int i=0; iT * @return modified copy of this matrix ******************************************************************************/ template const NRSMat NRSMat::operator-() const { NOT_GPU(*this); NRSMat result(nn, getlocation()); for(register size_t i = 0; i const NRSMat NRSMat::operator-() const { NRSMat result(nn, getlocation()); #ifdef CUDALA if(location == cpu){ #endif memcpy(result.v, v, NN2*sizeof(double)); cblas_dscal(NN2, -1., result.v, 1); #ifdef CUDALA }else{ cublasDcopy(NN2, v, 1, result.v, 1); TEST_CUBLAS("cublasDcopy"); cublasDscal(NN2, -1., result.v, 1); TEST_CUBLAS("cublasDscal"); } #endif return result; } /***************************************************************************//** * implements unary minus operator for this hermitian matrix * @return modified copy of this matrix ******************************************************************************/ template <> const NRSMat > NRSMat >::operator-() const { NRSMat > result(nn, getlocation()); #ifdef CUDALA if(location == cpu) { #endif memcpy(result.v, v, NN2*sizeof(std::complex)); cblas_zscal(NN2, &CMONE, result.v, 1); #ifdef CUDALA }else{ cublasZcopy(NN2, (cuDoubleComplex*)v, 1, (cuDoubleComplex*)result.v, 1); TEST_CUBLAS("cublasZcopy"); cublasZscal(NN2, CUMONE, (cuDoubleComplex*)result.v, 1); TEST_CUBLAS("cublasZscal"); } #endif return result; } /***************************************************************************//** * @return the sum of the diagonal elements ******************************************************************************/ template const T NRSMat::trace() const { NOT_GPU(*this); T tmp = 0; for(register int i=0; i void NRSMat::randomize(const double &x) { NOT_GPU(*this); for(size_t i=0; i void NRSMat >::randomize(const double &x) { for(register size_t i=0; iFILE structure representing the output file * @param[in] format format specification in standard printf-like form * @param[in] modulo * @see lawritemat() ******************************************************************************/ template void NRSMat::fprintf(FILE *file, const char *format, const int modulo) const { NOT_GPU(*this); lawritemat(file, (const T *)(*this) ,nn, nn, format, 2, modulo, 1); } /***************************************************************************//** * routine for formatted input via fscanf * @param[in] f pointer to FILE structure representing the input file * @param[in] format format specification in standard printf-like form ******************************************************************************/ template void NRSMat::fscanf(FILE *f, const char *format) { int n, m; NOT_GPU(*this); if (::fscanf(f,"%d %d", &n, &m) != 2) laerror("cannot read matrix dimensions in NRSMat::fscanf(FILE *, const char *)"); if (n != m) laerror("different dimensions in NRSMat::fscanf(FILE *, const char *)"); resize(n); for (int i=0; i::fscanf(FILE *, const char *) - unable to read matrix element"); } //apply permutation template const NRSMat NRSMat::permuted(const NRPerm &p, const bool inverse) const { #ifdef DEBUG if(!p.is_valid()) laerror("invalid permutation of smatrix"); #endif int n=p.size(); if(n!=(*this).nrows()) laerror("incompatible permutation and smatrix"); #ifdef CUDALA if(this->getlocation() != cpu || p.getlocation() != cpu ) laerror("permutations can be done only in CPU memory"); #endif NRSMat r(n); if(inverse) { for(int i=1; i<=n; ++i) { int pi = p[i]-1; for(int j=1; j<=i; ++j) { int pj = p[j] - 1; r(i-1,j-1) = (*this)(pi,pj); } } } else { for(int i=1; i<=n; ++i) { int pi = p[i]-1; for(int j=1; j<=i; ++j) { int pj = p[j] - 1; r(pi,pj) = (*this)(i-1,j-1); } } } return r; } /***************************************************************************//** * multiply this real double-precision symmetric matrix \f$S\f$ stored in packed form * with real double-precision dense matrix \f$A\f$ * @param[in] rhs real double-precision matrix \f$A\f$ * @return matrix produt \f$S\times{}A\f$ ******************************************************************************/ template<> const NRMat NRSMat::operator*(const NRMat &rhs) const { #ifdef DEBUG if(nn != rhs.nrows()) laerror("incompatible dimensions in NRMat NRSMat::operator*(const NRMat &)"); #endif SAME_LOC(*this, rhs); NRMat result(nn, rhs.ncols(), getlocation()); #ifdef CUDALA if(location == cpu){ #endif for(register int k = 0; k const NRMat > NRSMat >::operator*(const NRMat > &rhs) const { #ifdef DEBUG if (nn != rhs.nrows()) laerror("incompatible dimensions in NRSMat >::operator*(const NRMat > &)"); #endif SAME_LOC(*this, rhs); NRMat > result(nn, rhs.ncols(), getlocation()); #ifdef CUDALA if(location == cpu){ #endif for(register int k=0; k const NRMat NRSMat::operator*(const NRSMat &rhs) const { #ifdef DEBUG if (nn != rhs.nn) laerror("incompatible dimensions in NRMat NRSMat::operator*(const NRSMat &)"); #endif NRMat result(0.0, nn, nn); double *p, *q; p = v; for (int i=0; i const NRMat > NRSMat >::operator*(const NRSMat > &rhs) const { #ifdef DEBUG if (nn != rhs.nn) laerror("incompatible dimensions in NRSMat >::operator*(const NRSMat > &)"); #endif SAME_LOC(*this, rhs); NRMat > result(nn, nn, getlocation()); NRMat > rhsmat(rhs); result = *this * rhsmat; return result; } /***************************************************************************//** * compute inner product of this real symmetric matrix \f$A\f$ with given real symmetric matrix \f$B\f$ * i.e. determine the value of * \f[\sum_{i,j}A_{i,j}B_{i,j}\f] * @param[in] rhs matrix \f$B\f$ * @return computed inner product ******************************************************************************/ template<> const double NRSMat::dot(const NRSMat &rhs) const { double ret(0.); #ifdef DEBUG if (nn != rhs.nn) laerror("incompatible dimensions in double NRSMat::dot(const NRSMat &)"); #endif SAME_LOC(*this, rhs); #ifdef CUDALA if(location == cpu){ #endif ret = cblas_ddot(NN2, v, 1, rhs.v, 1); #ifdef CUDALA }else{ ret = cublasDdot(NN2, v, 1, rhs.v, 1); } #endif return ret; } /***************************************************************************//** * compute inner product of this complex symmetric matrix \f$A\f$ with given complex symmetric matrix \f$B\f$ * i.e. determine the value of * \f[\sum_{i,j}\overbar{A_{i,j}}B_{i,j}\f] * @param[in] rhs matrix \f$B\f$ * @return computed inner product ******************************************************************************/ template<> const std::complex NRSMat >::dot(const NRSMat > &rhs) const { #ifdef DEBUG if (nn != rhs.nn) laerror("incompatible dimensions in std::complex NRSMat >::dot(const NRSMat > &)"); #endif std::complex dot(0., 0.); SAME_LOC(*this, rhs); #ifdef CUDALA if(location == cpu){ #endif cblas_zdotc_sub(NN2, v, 1, rhs.v, 1, &dot); #ifdef CUDALA }else{ const cuDoubleComplex _dot = cublasZdotc(NN2, (cuDoubleComplex*)v, 1, (cuDoubleComplex*)(rhs.v), 1); dot = std::complex(cuCreal(_dot), cuCimag(_dot)); TEST_CUBLAS("cublasZdotc"); } #endif return dot; } /***************************************************************************//** * compute inner product of this real double-precision symmetric matrix \f$S\f$ of order \f$n\f$ * with given real double-precision vector \f$\vec{v}\f$ of length \f$n(n+1)/2\f$ * @param[in] rhs real double-precision vector \f$\vec{v}\f$ * @return computed inner product ******************************************************************************/ template<> const double NRSMat::dot(const NRVec &rhs) const { double ret(0.0); #ifdef DEBUG if(NN2 != rhs.nn) laerror("incompatible dimensions in double NRSMat::dot(const NRVec &)"); #endif SAME_LOC(*this, rhs); #ifdef CUDALA if(location == cpu){ #endif ret = cblas_ddot(NN2, v, 1, rhs.v, 1); #ifdef CUDALA }else{ ret = cublasDdot(NN2, v, 1, rhs.v, 1); TEST_CUBLAS("cublasDdot"); } #endif return ret; } /***************************************************************************//** * compute inner product of this complex double-precision hermitian matrix \f$H\f$ of order \f$n\f$ * with given complex double-precision vector \f$\vec{v}\f$ of length \f$n(n+1)/2\f$ * @param[in] rhs complex double-precision vector \f$\vec{v}\f$ * @return computed inner product ******************************************************************************/ template<> const std::complex NRSMat >::dot(const NRVec > &rhs) const { #ifdef DEBUG if(NN2 != rhs.nn) laerror("incompatible dimensions in std::complex NRSMat >::dot(const NRVec > &)"); #endif std::complex dot(0., 0.); SAME_LOC(*this, rhs); #ifdef CUDALA if(location == cpu){ #endif cblas_zdotc_sub(NN2, v, 1, rhs.v, 1, &dot); #ifdef CUDALA }else{ const cuDoubleComplex _dot = cublasZdotc(NN2, (cuDoubleComplex*)v, 1, (cuDoubleComplex*)rhs.v, 1); TEST_CUBLAS("cublasZdotc"); dot = std::complex(cuCreal(_dot), cuCimag(_dot)); } #endif return dot; } /***************************************************************************//** * compute the Frobenius norm of this real double-precision symmetric matrix * @param[in] scalar subtract this scalar value from the diagonal elements before the norm computation ******************************************************************************/ template<> const double NRSMat::norm(const double scalar) const { if(!scalar){ double ret(0.); #ifdef CUDALA if(location == cpu){ #endif ret = cblas_dnrm2(NN2, v, 1); #ifdef CUDALA }else{ ret = cublasDnrm2(NN2, v, 1); TEST_CUBLAS("cublasDnrm2"); } #endif return ret; } NOT_GPU(*this); double sum(0.); int k(0); for(register int i=0; i const double NRSMat< std::complex >::norm(const std::complex scalar) const { if(!(scalar.real()) && !(scalar.imag())){ double ret(0.); #ifdef CUDALA if(location == cpu){ #endif ret = cblas_dznrm2(NN2, v, 1); #ifdef CUDALA }else{ ret = cublasDznrm2(NN2, (cuDoubleComplex*)v, 1); TEST_CUBLAS("cublasDznrm2"); } #endif return ret; } int k(0); double sum(0.); std::complex tmp; for(register int i=0; i void NRSMat::axpy(const double alpha, const NRSMat &x) { #ifdef DEBUG if(nn != x.nn) laerror("incompatible dimensions in void NRSMat::axpy(const double, const NRSMat&)"); #endif SAME_LOC(*this, x); copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_daxpy(NN2, alpha, x.v, 1, v, 1); #ifdef CUDALA }else{ cublasDaxpy(NN2, alpha, x.v, 1, v, 1); TEST_CUBLAS("cublasDaxpy"); } #endif } /***************************************************************************//** * for this complex double-precision hermitian matrix \f$H\f$ stored in packed form, * complex scalar value \f$\alpha\f$ and complex double-precision hermitian matrix \f$G\f$, compute * \f[H \leftarrow \alpha G + H\f] ******************************************************************************/ template<> void NRSMat >::axpy(const std::complex alpha, const NRSMat > & x) { #ifdef DEBUG if(nn != x.nn) laerror("incompatible dimensions in void NRSMat >::axpy(const std::complex , const NRSMat >&)"); #endif SAME_LOC(*this, x); copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_zaxpy(nn, &alpha, x.v, 1, v, 1); #ifdef CUDALA }else{ const cuDoubleComplex _alpha = make_cuDoubleComplex(alpha.real(), alpha.imag()); cublasZaxpy(NN2, _alpha, (cuDoubleComplex*)x.v, 1, (cuDoubleComplex*)v, 1); TEST_CUBLAS("cublasZaxpy"); } #endif } /***************************************************************************//** * create hermitian matrix \f$H\f$ from given real double-precision symmetric * matrix \f$S\f$ * @param[in] rhs real double-precision symmetric matrix \f$S\f$ * @param[in] imagpart flag determining whether \f$S\f$ should correspond to the real or imaginary part of \f$H\f$ ******************************************************************************/ template<> NRSMat >::NRSMat(const NRSMat &rhs, bool imagpart): nn(rhs.nrows()), count(new int(1)) { //inconsistent in general case? const int nnp1 = nn*(nn + 1)/2; #ifdef CUDALA location = rhs.getlocation(); if(location == cpu){ #endif v = new std::complex[nnp1]; memset(v, 0, nnp1*sizeof(std::complex)); cblas_dcopy(nnp1, &rhs(0, 0), 1, ((double *)v) + (imagpart?1:0), 2); #ifdef CUDALA }else{ v = (std::complex*) gpualloc(nnp1*sizeof(std::complex)); std::complex *_val = gpuputcomplex(CZERO); cublasZcopy(nnp1, (cuDoubleComplex*)_val, 0, (cuDoubleComplex*)v, 1); TEST_CUBLAS("cublasZcopy"); gpufree(_val); cublasDcopy(nnp1, (double*)(&rhs(0,0)), 1, ((double*)v) + (imagpart?1:0), 2); TEST_CUBLAS("cublasDcopy"); } #endif } /***************************************************************************//** * extract block submatrix * @param[in] from starting position * @param[in] to final position * @return extracted block submatrix ******************************************************************************/ template const NRSMat NRSMat::submatrix(const int from, const int to) const { #ifdef DEBUG if(from<0 || from>=nn|| to<0 || to>=nn || from>to){ laerror("invalid submatrix specification"); } #endif NOT_GPU(*this); const int n = to - from + 1; NRSMat r(n); for(int i=0; i=nn) laerror("bad index in submatrix"); for(int j=0; j<=i; ++j) { int jj= j+from; r(i,j) = (*this)(ii,jj); } } return r; } template const NRSMat NRSMat::submatrix(const NRVec &selection) const { NOT_GPU(*this); const int n = selection.size(); NRSMat r(n); for(int i=0; i=nn) laerror("bad index in submatrix"); for(int j=0; j<=i; ++j) { int jj=selection[j]; r(i,j) = (*this)(ii,jj); } } return r; } /***************************************************************************//** * places given matrix as submatrix at given position * @param[in] fromrow row-coordinate of top left corner * @param[in] fromcol col-coordinate of top left corner * @param[in] rhs input matrix ******************************************************************************/ template void NRSMat::storesubmatrix(const int from, const NRSMat &rhs) { #ifdef DEBUG if(from<0 || from>=nn){ laerror("invalid submatrix specification"); } #endif NOT_GPU(*this); const int n = rhs.nrows(); for(int i=0; i=nn) laerror("bad index in storesubmatrix"); for(int j=0; j<=i; ++j) { int jj= j+from; (*this)(ii,jj) = rhs(i,j); } } } template void NRSMat::storesubmatrix(const NRVec &selection, const NRSMat &rhs) { NOT_GPU(*this); const int n = selection.size(); if(rhs.size()!=n) laerror("size mismatch in storesubmatrix"); for(int i=0; i=nn) laerror("bad index in storesubmatrix"); for(int j=0; j<=i; ++j) { int jj=selection[j]; (*this)(ii,jj) = rhs(i,j); } } } template<> NRSMat NRSMat::inverse() {return NRSMat(NRMat(*this).inverse());} template<> NRSMat > NRSMat >::inverse() {return NRSMat >(NRMat >(*this).inverse());} /***************************************************************************//** * conjugate this general matrix * @return reference to the (unmodified) matrix ******************************************************************************/ template NRSMat& NRSMat::conjugateme() { #ifdef CUDALA if(location != cpu) laerror("general conjugation only on CPU"); #endif for(int i=0; i::conjugate(v[i]); return *this; } /***************************************************************************//** * conjugate this complex matrix * @return reference to the modified matrix ******************************************************************************/ template<> NRSMat >& NRSMat >::conjugateme() { copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_dscal((size_t)NN2, -1.0, ((double *)v) + 1, 2); #ifdef CUDALA }else{ cublasDscal((size_t)NN2, -1.0, ((double *)v) + 1, 2); } #endif return *this; } template<> NRSMat >& NRSMat >::conjugateme() { copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_sscal((size_t)NN2, -1.0, ((float *)v) + 1, 2); #ifdef CUDALA }else{ cublasSscal((size_t)NN2, -1.0, ((float *)v) + 1, 2); } #endif return *this; } /***************************************************************************//** * forced instantization in the corresponding object file ******************************************************************************/ template class NRSMat; template class NRSMat >; //template class NRSMat; //template class NRSMat >; template class NRSMat; template class NRSMat; template class NRSMat; template class NRSMat; template class NRSMat; template class NRSMat; template class NRSMat; template class NRSMat; template class NRSMat; template class NRSMat; }//namespace