/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */ /******************************************************************************* LA: linear algebra C++ interface library Copyright (C) 2008 Jiri Pittner or complex versions written by Roman Curik This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . *******************************************************************************/ #include #include #include #include #include #include #include #include "vec.h" #include "qsort.h" #include namespace LA { /***************************************************************************//** * conversion constructor interpreting a given matrix with \f$N\f$ rows and * \f$M\f$ columns of general type T as a vector of \f$N\times{}M\f$ * elements * @param[in] rhs matrix being converted * @see NRMat::NRMat() ******************************************************************************/ #ifndef MATPTR template NRVec::NRVec(const NRMat &rhs) { #ifdef CUDALA location = rhs.location; #endif nn = rhs.nn*rhs.mm; v = rhs.v; count = rhs.count; (*count)++; } #endif /***************************************************************************//** * routine for raw output * @param[in] fd file descriptor for output * @param[in] dim number of elements intended for output * @param[in] transp reserved * @see NRMat::put() ******************************************************************************/ template void NRVec::put(int fd, bool dim, bool transp) const { #ifdef CUDALA if(location != cpu){ NRVec tmp = *this; tmp.moveto(cpu); tmp.put(fd,dim,transp); return; } #endif errno = 0; int pad(1); //align at least 8-byte if(dim){ if(sizeof(int) != write(fd,&nn,sizeof(int))) laerror("write failed"); if(sizeof(int) != write(fd,&pad,sizeof(int))) laerror("write failed"); } LA_traits::multiput(nn,fd,v,dim); } /***************************************************************************//** * routine for raw input * @param[in] fd file descriptor for input * @param[in] dim number of elements intended for input, for dim=0 perform copyonwrite * @param[in] transp reserved * @see NRMat::get(), copyonwrite() ******************************************************************************/ template void NRVec::get(int fd, bool dim, bool transp) { #ifdef CUDALA if(location != cpu){ NRVec tmp; tmp.moveto(cpu); tmp.get(fd,dim,transp); tmp.moveto(location); *this = tmp; return; } #endif int nn0[2]; //align at least 8-byte errno = 0; if(dim){ if(2*sizeof(int) != read(fd,&nn0,2*sizeof(int))) laerror("read failed"); resize(nn0[0]); }else{ copyonwrite(); } LA_traits::multiget(nn,fd,v,dim); } /***************************************************************************//** * routine for formatted output via lawritemat * @param[in] file pointer to FILE structure representing the output file * @param[in] format format specification in standard printf-like form * @param[in] modulo * @see lawritemat() ******************************************************************************/ template void NRVec::fprintf(FILE *file, const char *format, const int modulo) const { NOT_GPU(*this); lawritemat(file, v, 1, nn, format, 1, modulo, 0); } /***************************************************************************//** * routine for formatted input via fscanf * @param[in] f pointer to FILE structure representing the input file * @param[in] format format specification in standard printf-like form ******************************************************************************/ template void NRVec::fscanf(FILE *f, const char *format) { int n(0); NOT_GPU(*this); if(::fscanf(f, "%d", &n) != 1) laerror("can not read vector dimension"); resize(n); for(register int i=0; i const NRVec NRVec::operator-() const { NRVec result(*this); result.copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_dscal(nn, -1.0, v, 1); #ifdef CUDALA }else{ cublasDscal(nn, -1.0, v, 1); TEST_CUBLAS("cublasDscal"); } #endif return result; } /***************************************************************************//** * unary minus operator in case of complex double-precision vector * @return the modified vector by value ******************************************************************************/ template<> const NRVec > NRVec >::operator-() const { NRVec > result(*this); result.copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_zdscal(nn, -1.0, v, 1); #ifdef CUDALA }else{ cublasZdscal(nn, -1.0, (cuDoubleComplex*)v, 1); TEST_CUBLAS("cublasZdscal"); } #endif return result; } /***************************************************************************//** * unary minus operator for vector of general type * @return the modified vector ******************************************************************************/ template const NRVec NRVec::operator-() const { NOT_GPU(*this); NRVec result(nn, getlocation()); for(register int i=0; i const bool NRVec::operator>(const NRVec &rhs) const { int n(nn); SAME_LOC(*this, rhs); NOT_GPU(*this); if(rhs.nn < n) n = rhs.nn; for(register int i=0; i::bigger(v[i], rhs.v[i])) return true; if(LA_traits::smaller(v[i], rhs.v[i])) return false; } return nn>rhs.nn; } /***************************************************************************//** * comparison operator (lexicographical order) * @param[in] rhs vector intended for comparison * @return * \li \c false current vector is bigger than vector \c rhs * \li \c true current vector is smaller than vector \c rhs ******************************************************************************/ template const bool NRVec::operator<(const NRVec &rhs) const { int n(nn); SAME_LOC(*this, rhs); NOT_GPU(*this); if(rhs.nn < n) n = rhs.nn; for(register int i=0; i::smaller(v[i], rhs.v[i])) return true; if(LA_traits::bigger(v[i], rhs.v[i])) return false; } return nn void NRVec::randomize(const double &x){ NOT_GPU(*this); for(register int i=0; i void NRVec >::randomize(const double &x) { NOT_GPU(*this); for(register int i=0; i(x*(2.*random()/(1. + RAND_MAX) - 1.), x*(2.*random()/(1. + RAND_MAX) - 1.)); } } /***************************************************************************//** * constructor creating complex vector from a real one * @param[in] rhs the real vector being converted into the complex one * @param[in] imagpart * \li \c true vector \c rhs is interpreted as the imaginary part of the new complex vector * \li \c false vector \c rhs is interpreted as the real part of the new complex vector * @return * \li \c false current vector is bigger than vector \c rhs * \li \c true current vector is smaller than vector \c rhs ******************************************************************************/ template<> NRVec >::NRVec(const NRVec &rhs, bool imagpart): nn(rhs.size()){ count = new int; *count = 1; #ifdef CUDALA location = rhs.getlocation(); if(location == cpu){ #endif v = (std::complex*)new std::complex[nn]; memset(v, 0, nn*sizeof(std::complex)); cblas_dcopy(nn, &rhs[0], 1, ((double *)v) + (imagpart?1:0), 2); #ifdef CUDALA }else{ v = (std::complex*) gpualloc(nn*sizeof(std::complex)); cublasZscal(nn, CUZERO, (cuDoubleComplex*)v, 1); TEST_CUBLAS("cublasZscal"); cublasDcopy(nn, &rhs[0], 1, ((double *)v) + (imagpart?1:0), 2); TEST_CUBLAS("cublasDcopy"); } #endif } /***************************************************************************//** * perform the axpy operation on the current real vector \f$\vec{v}\f$, i.e. * \f[ \vec{v} \leftarrow \vec{v} + \alpha\vec{x} \f] * @param[in] alpha double-precision real parameter \f$\alpha\f$ * @param[in] x double-precision real vector \f$\vec{x}\f$ ******************************************************************************/ template<> void NRVec::axpy(const double alpha, const NRVec &x) { #ifdef DEBUG if (nn != x.nn) laerror("incompatible vectors"); #endif SAME_LOC(*this, x); copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_daxpy(nn, alpha, x.v, 1, v, 1); #ifdef CUDALA }else{ cublasDaxpy(nn, alpha, x.v, 1, v, 1); TEST_CUBLAS("cublasDaxpy"); } #endif } /***************************************************************************//** * perform the axpy operation on the current complex vector \f$\vec{v}\f$, i.e. * \f[ \vec{v} \leftarrow \vec{v} + \alpha\vec{x} \f] * @param[in] alpha \f$\alpha\f$ parameter * @param[in] x complex vector \f$\vec{x}\f$ ******************************************************************************/ template<> void NRVec >::axpy(const std::complex alpha, const NRVec > &x){ #ifdef DEBUG if (nn != x.nn) laerror("incompatible vectors"); #endif SAME_LOC(*this, x); copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_zaxpy(nn, &alpha, x.v, 1, v, 1); #ifdef CUDALA }else{ const cuDoubleComplex _alpha = make_cuDoubleComplex(alpha.real(), alpha.imag()); cublasZaxpy(nn, _alpha, (cuDoubleComplex*)x.v, 1, (cuDoubleComplex*)v, 1); TEST_CUBLAS("cublasZaxpy"); } #endif } /***************************************************************************//** * perform the axpy operation on the current real vector \f$\vec{v}\f$, i.e. * \f[ \vec{v} \leftarrow \vec{v} + \alpha\vec{x} \f] * @param[in] alpha \f$\alpha\f$ parameter * @param[in] x pointer to double-precision real data * @param[in] stride sets the stride ******************************************************************************/ template<> void NRVec::axpy(const double alpha, const double *x, const int stride){ NOT_GPU(*this); copyonwrite(); cblas_daxpy(nn, alpha, x, stride, v, 1); } /***************************************************************************//** * perform the axpy operation on the current complex vector \f$\vec{v}\f$, i.e. * \f[ \vec{v} \leftarrow \vec{v} + \alpha\vec{x} \f] * @param[in] alpha double-precision complex parameter \f$\alpha\f$ * @param[in] x pointer to double-precision complex data * @param[in] stride sets the stride ******************************************************************************/ template<> void NRVec >::axpy(const std::complex alpha, const std::complex *x, const int stride){ NOT_GPU(*this); copyonwrite(); cblas_zaxpy(nn, &alpha, x, stride, v, 1); } /***************************************************************************//** * assign real scalar value to every element of the current vector * @param[in] a scalar value to be assigned * @return reference to the modified vector ******************************************************************************/ template<> NRVec& NRVec::operator=(const double &a){ copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_dcopy(nn, &a, 0, v, 1); #ifdef CUDALA }else{ smart_gpu_set(nn, (double)0, v); } #endif return *this; } /***************************************************************************//** * assign complex scalar value to every element of the current vector * @param[in] a scalar value to be assigned * @return reference to the modified vector ******************************************************************************/ template<> NRVec >& NRVec >::operator=(const std::complex &a){ copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_zcopy(nn, &a, 0, v, 1); #ifdef CUDALA }else{ smart_gpu_set(nn, (std::complex)0, v); } #endif return *this; } /***************************************************************************//** * assign scalar value to every element of the current vector of general type T * @param[in] a scalar value to be assigned * @return reference to the modified vector ******************************************************************************/ template NRVec& NRVec::operator=(const T &a){ NOT_GPU(*this); copyonwrite(); if(a != (T)0){ for(register int i=0; i NRVec& NRVec::normalize(double *norm){ double tmp(0.0); #ifdef CUDALA if(location == cpu){ #endif tmp = cblas_dnrm2(nn, v, 1); if(norm) *norm = tmp; #ifdef DEBUG if(!tmp) laerror("attempt to normalize zero vector"); #endif copyonwrite(); tmp = 1.0 / tmp; cblas_dscal(nn, tmp, v, 1); #ifdef CUDALA }else{ tmp = cublasDnrm2(nn, v, 1); TEST_CUBLAS("cublasDnrm2"); if(norm) *norm = tmp; #ifdef DEBUG if(!tmp) laerror("attempt to normalize zero vector"); #endif copyonwrite(); tmp = 1.0 / tmp; cublasDscal(nn, tmp, v, 1); TEST_CUBLAS("cublasDscal"); } #endif return *this; } /***************************************************************************//** * normalize current complex vector (in the Euclidean norm) * @param[in] norm if not NULL, the norm of this vector is stored into *norm * @return reference to the modified vector ******************************************************************************/ template<> NRVec > & NRVec >::normalize(double *norm){ double tmp(0.0); #ifdef CUDALA if(location == cpu){ #endif tmp = cblas_dznrm2(nn, v, 1); if(norm) *norm = tmp; #ifdef DEBUG if(tmp == 0.0) laerror("attempt to normalize zero vector"); #endif copyonwrite(); tmp = 1.0 / tmp; cblas_zdscal(nn, tmp, v, 1); #ifdef CUDALA }else{ tmp = cublasDznrm2(nn, (cuDoubleComplex*)v, 1); TEST_CUBLAS("cublasDznrm2"); if(norm) *norm = tmp; #ifdef DEBUG if(tmp == 0.0) laerror("attempt to normalize zero vector"); #endif copyonwrite(); tmp = 1.0 / tmp; cublasZdscal(nn, tmp, (cuDoubleComplex*)v, 1); TEST_CUBLAS("cublasZdscal"); } #endif return *this; } /***************************************************************************//** * perform the \b gemv operation on this real vector \f$\vec{y}\f$, i.e. * \f[\vec{y}\leftarrow \alpha\operatorname{op}(A)\cdot\vec{x}+\beta\vec{y}\f] * @param[in] beta real parameter \f$\beta\f$ * @param[in] A real matrix \f$A\f$ * @param[in] trans if trans == 'n' use \f$A\f$ directly, otherwise \f$\operatorname{op}(A)\equiv{}A^\mathrm{T}\f$ * @param[in] alpha real parameter \f$\alpha\f$ * @param[in] x real vector \f$\vec{x}\f$ * @see NRMat::gemm ******************************************************************************/ template<> void NRVec::gemv(const double beta, const NRMat &A, const char trans, const double alpha, const NRVec &x) { #ifdef DEBUG if((tolower(trans) == 'n'?A.ncols():A.nrows()) != x.size()){ laerror("incompatible vectors"); } #endif SAME_LOC3(*this, x, A); copyonwrite(); #ifdef CUDALA if(location==cpu){ #endif cblas_dgemv(CblasRowMajor, (tolower(trans)=='n' ? CblasNoTrans:CblasTrans), A.nrows(), A.ncols(), alpha, A, A.ncols(), x.v, 1, beta, v, 1); #ifdef CUDALA }else{ cublasDgemv((tolower(trans)=='n'?'T':'N'), A.ncols(), A.nrows(), alpha, A, A.ncols(), x.v, 1, beta, v, 1); TEST_CUBLAS("cublasDgemv"); } #endif } /***************************************************************************//** * perform the \b gemv operation on this complex vector \f$\vec{y}\f$, i.e. * \f[\vec{y}\leftarrow \alpha\operatorname{op}(A)\cdot\vec{x}+\beta\vec{y}\f] * @param[in] beta real parameter \f$\beta\f$ * @param[in] A real matrix \f$A\f$ * @param[in] trans if trans == 'n' use \f$A\f$ directly, otherwise \f$\operatorname{op}(A)\equiv{}A^\mathrm{T}\f$ * @param[in] alpha real parameter \f$\alpha\f$ * @param[in] x real vector \f$\vec{x}\f$ * @see gemm ******************************************************************************/ template<> void NRVec >::gemv(const double beta, const NRMat &A, const char trans, const double alpha, const NRVec > &x) { #ifdef DEBUG if ((tolower(trans) == 'n'?A.ncols():A.nrows()) != x.size()){ laerror("incompatible vectors"); } #endif SAME_LOC3(*this, x, A); copyonwrite(); #ifdef CUDALA if(location==cpu){ #endif cblas_dgemv(CblasRowMajor, (tolower(trans)=='n'?CblasNoTrans:CblasTrans), A.nrows(), A.ncols(), alpha, A, A.ncols(), (double *)x.v, 2, beta, (double *)v, 2); cblas_dgemv(CblasRowMajor, (tolower(trans)=='n'?CblasNoTrans:CblasTrans), A.nrows(), A.ncols(), alpha, A, A.ncols(), ((double *)x.v) + 1, 2, beta, ((double *)v)+1, 2); #ifdef CUDALA }else{ cublasDgemv((tolower(trans)=='n'?'T':'N'), A.ncols(), A.nrows(), alpha, A, A.ncols(), (double*)(x.v), 2, beta, (double *)v, 2); TEST_CUBLAS("cublasDgemv"); cublasDgemv((tolower(trans)=='n'?'T':'N'), A.ncols(), A.nrows(), alpha, A, A.ncols(), ((double *)x.v) + 1, 2, beta, ((double *)v)+1, 2); TEST_CUBLAS("cublasDgemv"); } #endif } /***************************************************************************//** * perform the \b gemv operation on this complex vector \f$\vec{y}\f$, i.e. * \f[\vec{y}\leftarrow \alpha\operatorname{op}(A)\cdot\vec{x}+\beta\vec{y}\f] * @param[in] beta complex parameter \f$\beta\f$ * @param[in] A complex matrix \f$A\f$ * @param[in] trans if trans == 'n' use \f$A\f$ directly, otherwise \f$\operatorname{op}(A)\equiv{}A^\mathrm{T}\f$ * @param[in] alpha complex parameter \f$\alpha\f$ * @param[in] x real vector \f$\vec{x}\f$ * @see gemm ******************************************************************************/ template<> void NRVec >::gemv(const std::complex beta, const NRMat > &A, const char trans, const std::complex alpha, const NRVec > &x) { #ifdef DEBUG if ((tolower(trans) == 'n'?A.ncols():A.nrows()) != x.size()){ laerror("incompatible vectors"); } #endif SAME_LOC3(*this, x, A); copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_zgemv(CblasRowMajor, (tolower(trans)=='n'?CblasNoTrans:CblasTrans), A.nrows(), A.ncols(), &alpha, A, A.ncols(), x.v, 1, &beta, v, 1); #ifdef CUDALA }else{ const cuDoubleComplex _alpha = make_cuDoubleComplex(alpha.real(), alpha.imag()); const cuDoubleComplex _beta = make_cuDoubleComplex(beta.real(), beta.imag()); cublasZgemv((tolower(trans)=='n'?'T':'N'), A.ncols(), A.nrows(), _alpha, (cuDoubleComplex*)(A[0]), A.ncols(), (cuDoubleComplex*)(x.v), 1, _beta, (cuDoubleComplex*)v, 1); TEST_CUBLAS("cublasZgemv"); } #endif } /***************************************************************************//** * perform the \b gemv operation on this real vector \f$\vec{y}\f$, i.e. * \f[\vec{y}\leftarrow \alpha\operatorname{op}(A)\cdot\vec{x}+\beta\vec{y}\f] * @param[in] beta real parameter \f$\beta\f$ * @param[in] A real symmetric matrix \f$A\f$ stored in packed form * @param[in] trans if trans == 'n' use \f$A\f$ directly, otherwise \f$\operatorname{op}(A)\equiv{}A^\mathrm{T}\f$ * @param[in] alpha real parameter \f$\alpha\f$ * @param[in] x real vector \f$\vec{x}\f$ * @see gemm, NRSMat ******************************************************************************/ template<> void NRVec::gemv(const double beta, const NRSMat &A, const char trans, const double alpha, const NRVec &x) { #ifdef DEBUG if(A.ncols() != x.size()){ laerror("incompatible dimensions"); } #endif SAME_LOC3(*this, A, x); copyonwrite(); #ifdef CUDALA if(location==cpu){ #endif cblas_dspmv(CblasRowMajor, CblasLower, A.ncols(), alpha, A, x.v, 1, beta, v, 1); #ifdef CUDALA }else{ cublasDspmv('U', A.ncols(), alpha, A, x.v, 1, beta, v, 1); TEST_CUBLAS("cublasDspmv"); } #endif } /***************************************************************************//** * perform the \c gemv operation on this complex vector \f$\vec{y}\f$, i.e. * \f[\vec{y}\leftarrow \alpha\operatorname{op}(A)\cdot\vec{x}+\beta\vec{y}\f] * @param[in] beta real parameter \f$\beta\f$ * @param[in] A real symmetric matrix \f$A\f$ stored in packed form * @param[in] trans if trans == 'n' use \f$A\f$ directly, otherwise \f$\operatorname{op}(A)\equiv{}A^\mathrm{T}\f$ * @param[in] alpha real parameter \f$\alpha\f$ * @param[in] x complex vector \f$\vec{x}\f$ * @see gemm, NRSMat ******************************************************************************/ template<> void NRVec >::gemv(const double beta, const NRSMat &A, const char trans, const double alpha, const NRVec > &x) { #ifdef DEBUG if(A.ncols() != x.size()){ laerror("incompatible dimensions"); } #endif SAME_LOC3(*this, A, x); copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_dspmv(CblasRowMajor, CblasLower, A.ncols(), alpha, A, (double *)x.v, 2, beta, (double *)v, 2); cblas_dspmv(CblasRowMajor, CblasLower, A.ncols(), alpha, A, ((double *)x.v) + 1, 2, beta, ((double *)v) + 1, 2); #ifdef CUDALA }else{ cublasDspmv('U', A.ncols(), alpha, A, (double*)(x.v), 2, beta, (double*)v, 2); TEST_CUBLAS("cublasDspmv"); cublasDspmv('U', A.ncols(), alpha, A, ((double*)(x.v)) + 1, 2, beta, ((double*)v) + 1, 2); TEST_CUBLAS("cublasDspmv"); } #endif } /***************************************************************************//** * perform the \b gemv operation on this complex vector \f$\vec{y}\f$, i.e. * \f[\vec{y}\leftarrow \alpha\operatorname{op}(A)\cdot\vec{x}+\beta\vec{y}\f] * @param[in] beta complex parameter \f$\beta\f$ * @param[in] A complex Hermitian matrix \f$A\f$ stored in packed form * @param[in] trans not used * @param[in] alpha complex parameter \f$\alpha\f$ * @param[in] x complex vector \f$\vec{x}\f$ * @see gemm, NRSMat ******************************************************************************/ template<> void NRVec >::gemv(const std::complex beta, const NRSMat > &A, const char trans, const std::complex alpha, const NRVec > &x){ #ifdef DEBUG if(A.ncols() != x.size()) laerror("incompatible dimensions"); #endif SAME_LOC3(*this, A, x); copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_zhpmv(CblasRowMajor, CblasLower, A.ncols(), &alpha, A, x.v, 1, &beta, v, 1); #ifdef CUDALA }else{ const cuDoubleComplex _alpha = make_cuDoubleComplex(alpha.real(), alpha.imag()); const cuDoubleComplex _beta = make_cuDoubleComplex(beta.real(), beta.imag()); cublasZhpmv('U', A.ncols(), _alpha, (cuDoubleComplex*)((const std::complex*)A), (cuDoubleComplex*)(x.v), 1, _beta, (cuDoubleComplex*)(this->v), 1); TEST_CUBLAS("cublasZhpmv"); } #endif } /***************************************************************************//** * computes the outer product of this real vector \f$\vec{a}\f$ with given * real vector \f$\vec{b}\f$ and scales the resulting matrix with factor \f$\alpha\f$, i.e. * the matrix elements of the final matrix \f$A\f$ can be expressed as * \f[A_{i,j} = \alpha\cdot\vec{a}_i\vec{b}_j\f] * @param[in] b real vector \f$\vec{b}\f$ * @param[in] conj not used * @param[in] scale real factor \f$\alpha\f$ ******************************************************************************/ template<> const NRMat NRVec::otimes(const NRVec &b,const bool conj, const double &scale) const { SAME_LOC(*this, b); NRMat result(0.0, nn, b.nn, this->getlocation()); #ifdef CUDALA if(location == cpu){ #endif cblas_dger(CblasRowMajor, nn, b.nn, scale, v, 1, b.v, 1, result, b.nn); #ifdef CUDALA }else{ cublasDger(b.nn, nn, scale, b.v, 1, v, 1, result[0], b.nn); TEST_CUBLAS("cublasDger"); } #endif return result; } /***************************************************************************//** * computes the outer product of this complex vector \f$\vec{a}\f$ with given * complex vector \f$\vec{b}\f$ and scales the resulting matrix with factor \f$\alpha\f$, i.e. * the matrix elements of the final matrix \f$A\f$ can be expressed as * \f[A_{i,j} = \alpha\cdot\vec{a}_i\vec{b}_j\f] * in case conj = true, the result is * \f[A_{i,j} = \alpha\cdot\vec{a}_i\vec{b}_j^{*}\f] * @param[in] b complex vector \f$\vec{b}\f$ * @param[in] conj determines whther the vector \f$\vec{b}\f$ is conjugated * @param[in] scale complex scaling factor \f$\alpha\f$ ******************************************************************************/ template<> const NRMat > NRVec >::otimes(const NRVec > &b, const bool conj, const std::complex &scale) const { SAME_LOC(*this, b); NRMat > result(0., nn, b.nn, this->getlocation()); #ifdef CUDALA if(location == cpu){ #endif if(conj){ cblas_zgerc(CblasRowMajor, nn, b.nn, &scale, v, 1, b.v, 1, result, b.nn); }else{ cblas_zgeru(CblasRowMajor, nn, b.nn, &scale, v, 1, b.v, 1, result, b.nn); } #ifdef CUDALA }else{ if(conj){ const cuDoubleComplex alpha = make_cuDoubleComplex(scale.real(), -scale.imag()); cublasZgerc(b.nn, nn, alpha, (cuDoubleComplex*)(b.v), 1, (cuDoubleComplex*)(v), 1, (cuDoubleComplex*)(result[0]), b.nn); TEST_CUBLAS("cublasZgerc"); result.conjugateme(); }else{ const cuDoubleComplex alpha = make_cuDoubleComplex(scale.real(), +scale.imag()); cublasZgeru(b.nn, nn, alpha, (cuDoubleComplex*)(b.v), 1, (cuDoubleComplex*)(v), 1, (cuDoubleComplex*)(result[0]), b.nn); TEST_CUBLAS("cublasZgeru"); } } #endif return result; } template int NRVec::sort(int direction, int from, int to, int *perm) { NOT_GPU(*this); copyonwrite(); if(to == -1) to = nn - 1; if(direction) return memqsort<1, NRVec, int, int>(*this, perm, from, to); else return memqsort<0, NRVec, int, int>(*this, perm, from, to); } template int NRVec::sort(int direction, NRPerm &perm) { if(nn!=perm.size()) laerror("incompatible vector and permutation"); perm.identity(); int r=sort(direction,0,nn-1,&perm[1]); return r; } template<> NRVec > complexify(const NRVec &rhs) { NRVec > r(rhs.size(), rhs.getlocation()); #ifdef CUDALA if(rhs.getlocation() == cpu){ #endif cblas_dcopy(rhs.size(), &rhs[0], 1, (double *)(&r[0]), 2); #ifdef CUDALA }else{ r = 0; cublasDcopy(rhs.size(), rhs.v, 1, (double*)(r.v), 2); TEST_CUBLAS("cublasDcopy"); } #endif return r; } template const NRVec NRVec::permuted(const NRPerm &p, const bool inverse) const { #ifdef DEBUG if(!p.is_valid()) laerror("invalid permutation of vector"); #endif int n=p.size(); if(n!=(*this).size()) laerror("incompatible permutation and vector"); #ifdef CUDALA if(this->getlocation() != cpu || p.getlocation() != cpu ) laerror("permutations can be done only in CPU memory"); #endif NRVec r(n); if(inverse) for(int i=1; i<=n; ++i) r[i-1] = v[p[i]-1]; else for(int i=1; i<=n; ++i) r[p[i]-1] = v[i-1]; return r; } template void NRVec::permuteme(const CyclePerm &p) { #ifdef DEBUG if(!p.is_valid()) laerror("invalid permutation of vector"); #endif if(p.max()>nn) laerror("incompatible permutation and vector"); #ifdef CUDALA if(this->getlocation() != cpu || p.getlocation() != cpu ) laerror("permutations can be done only in CPU memory"); #endif copyonwrite(); for(int cycle=1; cycle<=p.size(); ++cycle) { int length= p[cycle].size(); if(length<=1) continue; //trivial cycle T tmp = v[p[cycle][length]-1]; for(int i=length; i>1; --i) v[p[cycle][i]-1] = v[p[cycle][i-1]-1]; v[p[cycle][1]-1] = tmp; } } /***************************************************************************//** * forced instantization in the corespoding object file ******************************************************************************/ /* Commented out by Roman for ICC #define INSTANTIZE(T) \ template void NRVec::put(int fd, bool dim, bool transp) const; \ template void NRVec::get(int fd, bool dim, bool transp); \ INSTANTIZE(double) INSTANTIZE(std::complex) INSTANTIZE(char) INSTANTIZE(short) INSTANTIZE(int) INSTANTIZE(long) INSTANTIZE(long long) INSTANTIZE(unsigned char) INSTANTIZE(unsigned short) INSTANTIZE(unsigned int) INSTANTIZE(unsigned long) INSTANTIZE(unsigned long long) */ #define INSTANTIZE_DUMMY(T) \ template<> void NRVec::gemv(const T beta, const NRMat &a, const char trans, const T alpha, const NRVec &x) { laerror("gemv on unsupported types"); } \ template<> void NRVec::gemv(const T beta, const NRSMat &a, const char trans, const T alpha, const NRVec &x) { laerror("gemv on unsupported types"); } \ template<> void NRVec::gemv(const T beta, const SparseMat &a, const char trans, const T alpha, const NRVec &x, bool s) { laerror("gemv on unsupported types"); } \ template<> void NRVec::gemv(const LA_traits_complex::Component_type beta, const LA_traits_complex::NRMat_Noncomplex_type &a, const char trans, const LA_traits_complex::Component_type alpha, const NRVec &x) { laerror("gemv on unsupported types"); } \ template<> void NRVec::gemv(const LA_traits_complex::Component_type beta, const LA_traits_complex::NRSMat_Noncomplex_type &a, const char trans, const LA_traits_complex::Component_type alpha, const NRVec &x) { laerror("gemv on unsupported types"); } \ template<> NRVec & NRVec::normalize(LA_traits::normtype *) {laerror("normalize() impossible for integer types"); return *this;} \ template<> const NRMat NRVec::otimes(const NRVec &b,const bool conj, const T &scale) const {laerror("otimes presently implemented only for double and complex double"); return NRMat ();} // Roman // following gemv are not implemented template<> void NRVec::gemv(const double beta, const SparseMat &a, const char trans, const double alpha, const NRVec &x, bool s) { laerror("gemv on unsupported types"); } template<> void NRVec< std::complex >::gemv(const std::complex beta, const SparseMat< std::complex > &a, const char trans, const std::complex alpha, const NRVec< std::complex > &x, bool s) { laerror("gemv on unsupported types"); } INSTANTIZE_DUMMY(char) INSTANTIZE_DUMMY(short) INSTANTIZE_DUMMY(int) INSTANTIZE_DUMMY(long) INSTANTIZE_DUMMY(long long) INSTANTIZE_DUMMY(unsigned char) INSTANTIZE_DUMMY(unsigned short) INSTANTIZE_DUMMY(unsigned int) INSTANTIZE_DUMMY(unsigned long) INSTANTIZE_DUMMY(unsigned long long) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex >) INSTANTIZE_DUMMY(std::complex >) //also not supported on gpu #define INSTANTIZE_NONCOMPLEX(T) \ template<>\ const T NRVec::max() const\ {\ if(nn==0) return 0;\ T m=v[0];\ for(int i=1; im) m=v[i];\ return m;\ }\ \ template<>\ const T NRVec::min() const\ {\ if(nn==0) return 0;\ T m=v[0];\ for(int i=1; i; template class NRVec >; template class NRVec; template class NRVec; template class NRVec; template class NRVec; template class NRVec; template class NRVec; template class NRVec; template class NRVec; template class NRVec; template class NRVec; }//namespace