/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */ /******************************************************************************* LA: linear algebra C++ interface library Copyright (C) 2008 Jiri Pittner or complex versions written by Roman Curik This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . *******************************************************************************/ #include #include #include #include #include #include #include #include "vec.h" #include #include "vecmat3.h" namespace LA { /***************************************************************************//** * conversion constructor interpreting a given matrix with \f$N\f$ rows and * \f$M\f$ columns of general type T as a vector of \f$N\times{}M\f$ * elements * @param[in] rhs matrix being converted * @see NRMat::NRMat() ******************************************************************************/ #ifndef MATPTR template NRVec::NRVec(const NRMat &rhs) { #ifdef CUDALA location = rhs.location; #endif nn = rhs.nn*rhs.mm; v = rhs.v; count = rhs.count; (*count)++; } #endif /***************************************************************************//** * routine for formatted output via lawritemat * @param[in] file pointer to FILE structure representing the output file * @param[in] format format specification in standard printf-like form * @param[in] modulo * @see lawritemat() ******************************************************************************/ template void NRVec::fprintf(FILE *file, const char *format, const int modulo) const { NOT_GPU(*this); lawritemat(file, v, 1, nn, format, 1, modulo, 0); } /***************************************************************************//** * routine for formatted input via fscanf * @param[in] f pointer to FILE structure representing the input file * @param[in] format format specification in standard printf-like form ******************************************************************************/ template void NRVec::fscanf(FILE *f, const char *format) { int n(0); NOT_GPU(*this); if(::fscanf(f, "%d", &n) != 1) laerror("can not read vector dimension"); resize(n); for(register int i=0; i const NRVec NRVec::operator-() const { NRVec result(*this); result.copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_dscal(nn, -1.0, result.v, 1); #ifdef CUDALA }else{ cublasDscal(nn, -1.0, result.v, 1); TEST_CUBLAS("cublasDscal"); } #endif return result; } /***************************************************************************//** * unary minus operator in case of complex double-precision vector * @return the modified vector by value ******************************************************************************/ template<> const NRVec > NRVec >::operator-() const { NRVec > result(*this); result.copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_zdscal(nn, -1.0, result.v, 1); #ifdef CUDALA }else{ cublasZdscal(nn, -1.0, (cuDoubleComplex*)result.v, 1); TEST_CUBLAS("cublasZdscal"); } #endif return result; } /***************************************************************************//** * unary minus operator for vector of general type * @return the modified vector ******************************************************************************/ template const NRVec NRVec::operator-() const { NOT_GPU(*this); NRVec result(nn, getlocation()); for(register int i=0; i const bool NRVec::operator>(const NRVec &rhs) const { int n(nn); SAME_LOC(*this, rhs); NOT_GPU(*this); if(rhs.nn < n) n = rhs.nn; for(register int i=0; i::bigger(v[i], rhs.v[i])) return true; if(LA_traits::smaller(v[i], rhs.v[i])) return false; } return nn>rhs.nn; } /***************************************************************************//** * comparison operator (lexicographical order) * @param[in] rhs vector intended for comparison * @return * \li \c false current vector is bigger than vector \c rhs * \li \c true current vector is smaller than vector \c rhs ******************************************************************************/ template const bool NRVec::operator<(const NRVec &rhs) const { int n(nn); SAME_LOC(*this, rhs); NOT_GPU(*this); if(rhs.nn < n) n = rhs.nn; for(register int i=0; i::smaller(v[i], rhs.v[i])) return true; if(LA_traits::bigger(v[i], rhs.v[i])) return false; } return nn void NRVec::randomize(const double &x){ NOT_GPU(*this); for(register int i=0; i void NRVec >::randomize(const double &x) { NOT_GPU(*this); for(register int i=0; i(x*RANDDOUBLESIGNED(), x*RANDDOUBLESIGNED()); } } /***************************************************************************//** * constructor creating complex vector from a real one * @param[in] rhs the real vector being converted into the complex one * @param[in] imagpart * \li \c true vector \c rhs is interpreted as the imaginary part of the new complex vector * \li \c false vector \c rhs is interpreted as the real part of the new complex vector * @return * \li \c false current vector is bigger than vector \c rhs * \li \c true current vector is smaller than vector \c rhs ******************************************************************************/ template<> NRVec >::NRVec(const NRVec &rhs, bool imagpart): nn(rhs.size()){ count = new int; *count = 1; #ifdef CUDALA location = rhs.getlocation(); if(location == cpu){ #endif v = (std::complex*)new std::complex[nn]; memset(v, 0, nn*sizeof(std::complex)); cblas_dcopy(nn, &rhs[0], 1, ((double *)v) + (imagpart?1:0), 2); #ifdef CUDALA }else{ v = (std::complex*) gpualloc(nn*sizeof(std::complex)); cublasZscal(nn, CUZERO, (cuDoubleComplex*)v, 1); TEST_CUBLAS("cublasZscal"); cublasDcopy(nn, &rhs[0], 1, ((double *)v) + (imagpart?1:0), 2); TEST_CUBLAS("cublasDcopy"); } #endif } /***************************************************************************//** * perform the axpy operation on the current real vector \f$\vec{v}\f$, i.e. * \f[ \vec{v} \leftarrow \vec{v} + \alpha\vec{x} \f] * @param[in] alpha double-precision real parameter \f$\alpha\f$ * @param[in] x double-precision real vector \f$\vec{x}\f$ ******************************************************************************/ template<> void NRVec::axpy(const double alpha, const NRVec &x) { #ifdef DEBUG if (nn != x.nn) laerror("incompatible vectors"); #endif SAME_LOC(*this, x); copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_daxpy(nn, alpha, x.v, 1, v, 1); #ifdef CUDALA }else{ cublasDaxpy(nn, alpha, x.v, 1, v, 1); TEST_CUBLAS("cublasDaxpy"); } #endif } /***************************************************************************//** * perform the axpy operation on the current complex vector \f$\vec{v}\f$, i.e. * \f[ \vec{v} \leftarrow \vec{v} + \alpha\vec{x} \f] * @param[in] alpha \f$\alpha\f$ parameter * @param[in] x complex vector \f$\vec{x}\f$ ******************************************************************************/ template<> void NRVec >::axpy(const std::complex alpha, const NRVec > &x){ #ifdef DEBUG if (nn != x.nn) laerror("incompatible vectors"); #endif SAME_LOC(*this, x); copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_zaxpy(nn, &alpha, x.v, 1, v, 1); #ifdef CUDALA }else{ const cuDoubleComplex _alpha = make_cuDoubleComplex(alpha.real(), alpha.imag()); cublasZaxpy(nn, _alpha, (cuDoubleComplex*)x.v, 1, (cuDoubleComplex*)v, 1); TEST_CUBLAS("cublasZaxpy"); } #endif } /***************************************************************************//** * perform the axpy operation on the current real vector \f$\vec{v}\f$, i.e. * \f[ \vec{v} \leftarrow \vec{v} + \alpha\vec{x} \f] * @param[in] alpha \f$\alpha\f$ parameter * @param[in] x pointer to double-precision real data * @param[in] stride sets the stride ******************************************************************************/ template<> void NRVec::axpy(const double alpha, const double *x, const int stride){ NOT_GPU(*this); copyonwrite(); cblas_daxpy(nn, alpha, x, stride, v, 1); } /***************************************************************************//** * perform the axpy operation on the current complex vector \f$\vec{v}\f$, i.e. * \f[ \vec{v} \leftarrow \vec{v} + \alpha\vec{x} \f] * @param[in] alpha double-precision complex parameter \f$\alpha\f$ * @param[in] x pointer to double-precision complex data * @param[in] stride sets the stride ******************************************************************************/ template<> void NRVec >::axpy(const std::complex alpha, const std::complex *x, const int stride){ NOT_GPU(*this); copyonwrite(); cblas_zaxpy(nn, &alpha, x, stride, v, 1); } /***************************************************************************//** * assign real scalar value to every element of the current vector * @param[in] a scalar value to be assigned * @return reference to the modified vector ******************************************************************************/ template<> NRVec& NRVec::operator=(const double &a){ copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_dcopy(nn, &a, 0, v, 1); #ifdef CUDALA }else{ smart_gpu_set(nn, (double)0, v); } #endif return *this; } /***************************************************************************//** * assign complex scalar value to every element of the current vector * @param[in] a scalar value to be assigned * @return reference to the modified vector ******************************************************************************/ template<> NRVec >& NRVec >::operator=(const std::complex &a){ copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_zcopy(nn, &a, 0, v, 1); #ifdef CUDALA }else{ smart_gpu_set(nn, (std::complex)0, v); } #endif return *this; } /***************************************************************************//** * assign scalar value to every element of the current vector of general type T * @param[in] a scalar value to be assigned * @return reference to the modified vector ******************************************************************************/ template NRVec& NRVec::operator=(const T &a){ NOT_GPU(*this); copyonwrite(); if(!LA_traits::is_plaindata() || a != (T)0){ for(register int i=0; i NRVec& NRVec::normalize(double *norm){ double tmp(0.0); #ifdef CUDALA if(location == cpu){ #endif tmp = cblas_dnrm2(nn, v, 1); if(norm) *norm = tmp; #ifdef DEBUG if(!tmp) laerror("attempt to normalize zero vector"); #endif copyonwrite(); tmp = 1.0 / tmp; cblas_dscal(nn, tmp, v, 1); #ifdef CUDALA }else{ tmp = cublasDnrm2(nn, v, 1); TEST_CUBLAS("cublasDnrm2"); if(norm) *norm = tmp; #ifdef DEBUG if(!tmp) laerror("attempt to normalize zero vector"); #endif copyonwrite(); tmp = 1.0 / tmp; cublasDscal(nn, tmp, v, 1); TEST_CUBLAS("cublasDscal"); } #endif return *this; } /***************************************************************************//** * normalize current complex vector (in the Euclidean norm) * @param[in] norm if not NULL, the norm of this vector is stored into *norm * @return reference to the modified vector ******************************************************************************/ template<> NRVec > & NRVec >::normalize(double *norm){ double tmp(0.0); #ifdef CUDALA if(location == cpu){ #endif tmp = cblas_dznrm2(nn, v, 1); if(norm) *norm = tmp; #ifdef DEBUG if(tmp == 0.0) laerror("attempt to normalize zero vector"); #endif copyonwrite(); tmp = 1.0 / tmp; cblas_zdscal(nn, tmp, v, 1); #ifdef CUDALA }else{ tmp = cublasDznrm2(nn, (cuDoubleComplex*)v, 1); TEST_CUBLAS("cublasDznrm2"); if(norm) *norm = tmp; #ifdef DEBUG if(tmp == 0.0) laerror("attempt to normalize zero vector"); #endif copyonwrite(); tmp = 1.0 / tmp; cublasZdscal(nn, tmp, (cuDoubleComplex*)v, 1); TEST_CUBLAS("cublasZdscal"); } #endif return *this; } /***************************************************************************//** * perform the \b gemv operation on this real vector \f$\vec{y}\f$, i.e. * \f[\vec{y}\leftarrow \alpha\operatorname{op}(A)\cdot\vec{x}+\beta\vec{y}\f] * @param[in] beta real parameter \f$\beta\f$ * @param[in] A real matrix \f$A\f$ * @param[in] trans if trans == 'n' use \f$A\f$ directly, otherwise \f$\operatorname{op}(A)\equiv{}A^\mathrm{T}\f$ * @param[in] alpha real parameter \f$\alpha\f$ * @param[in] x real vector \f$\vec{x}\f$ * @see NRMat::gemm ******************************************************************************/ template<> void NRVec::gemv(const double beta, const NRMat &A, const char trans, const double alpha, const NRVec &x) { #ifdef DEBUG if((tolower(trans) == 'n'?A.ncols():A.nrows()) != x.size()){ laerror("incompatible vectors"); } #endif SAME_LOC3(*this, x, A); copyonwrite(); #ifdef CUDALA if(location==cpu){ #endif cblas_dgemv(CblasRowMajor, (tolower(trans)=='n' ? CblasNoTrans:CblasTrans), A.nrows(), A.ncols(), alpha, A, A.ncols(), x.v, 1, beta, v, 1); #ifdef CUDALA }else{ cublasDgemv((tolower(trans)=='n'?'T':'N'), A.ncols(), A.nrows(), alpha, A, A.ncols(), x.v, 1, beta, v, 1); TEST_CUBLAS("cublasDgemv"); } #endif } /***************************************************************************//** * perform the \b gemv operation on this complex vector \f$\vec{y}\f$, i.e. * \f[\vec{y}\leftarrow \alpha\operatorname{op}(A)\cdot\vec{x}+\beta\vec{y}\f] * @param[in] beta real parameter \f$\beta\f$ * @param[in] A real matrix \f$A\f$ * @param[in] trans if trans == 'n' use \f$A\f$ directly, otherwise \f$\operatorname{op}(A)\equiv{}A^\mathrm{T}\f$ * @param[in] alpha real parameter \f$\alpha\f$ * @param[in] x real vector \f$\vec{x}\f$ * @see gemm ******************************************************************************/ template<> void NRVec >::gemv(const double beta, const NRMat &A, const char trans, const double alpha, const NRVec > &x) { #ifdef DEBUG if ((tolower(trans) == 'n'?A.ncols():A.nrows()) != x.size()){ laerror("incompatible vectors"); } #endif SAME_LOC3(*this, x, A); copyonwrite(); #ifdef CUDALA if(location==cpu){ #endif cblas_dgemv(CblasRowMajor, (tolower(trans)=='n'?CblasNoTrans:CblasTrans), A.nrows(), A.ncols(), alpha, A, A.ncols(), (double *)x.v, 2, beta, (double *)v, 2); cblas_dgemv(CblasRowMajor, (tolower(trans)=='n'?CblasNoTrans:CblasTrans), A.nrows(), A.ncols(), alpha, A, A.ncols(), ((double *)x.v) + 1, 2, beta, ((double *)v)+1, 2); #ifdef CUDALA }else{ cublasDgemv((tolower(trans)=='n'?'T':'N'), A.ncols(), A.nrows(), alpha, A, A.ncols(), (double*)(x.v), 2, beta, (double *)v, 2); TEST_CUBLAS("cublasDgemv"); cublasDgemv((tolower(trans)=='n'?'T':'N'), A.ncols(), A.nrows(), alpha, A, A.ncols(), ((double *)x.v) + 1, 2, beta, ((double *)v)+1, 2); TEST_CUBLAS("cublasDgemv"); } #endif } /***************************************************************************//** * perform the \b gemv operation on this complex vector \f$\vec{y}\f$, i.e. * \f[\vec{y}\leftarrow \alpha\operatorname{op}(A)\cdot\vec{x}+\beta\vec{y}\f] * @param[in] beta complex parameter \f$\beta\f$ * @param[in] A complex matrix \f$A\f$ * @param[in] trans if trans == 'n' use \f$A\f$ directly, otherwise \f$\operatorname{op}(A)\equiv{}A^\mathrm{T}\f$ * @param[in] alpha complex parameter \f$\alpha\f$ * @param[in] x real vector \f$\vec{x}\f$ * @see gemm ******************************************************************************/ template<> void NRVec >::gemv(const std::complex beta, const NRMat > &A, const char trans, const std::complex alpha, const NRVec > &x) { #ifdef DEBUG if ((tolower(trans) == 'n'?A.ncols():A.nrows()) != x.size()){ laerror("incompatible vectors"); } #endif SAME_LOC3(*this, x, A); copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_zgemv(CblasRowMajor, (tolower(trans)=='n'?CblasNoTrans:CblasTrans), A.nrows(), A.ncols(), &alpha, A, A.ncols(), x.v, 1, &beta, v, 1); #ifdef CUDALA }else{ const cuDoubleComplex _alpha = make_cuDoubleComplex(alpha.real(), alpha.imag()); const cuDoubleComplex _beta = make_cuDoubleComplex(beta.real(), beta.imag()); cublasZgemv((tolower(trans)=='n'?'T':'N'), A.ncols(), A.nrows(), _alpha, (cuDoubleComplex*)(A[0]), A.ncols(), (cuDoubleComplex*)(x.v), 1, _beta, (cuDoubleComplex*)v, 1); TEST_CUBLAS("cublasZgemv"); } #endif } /***************************************************************************//** * perform the \b gemv operation on this real vector \f$\vec{y}\f$, i.e. * \f[\vec{y}\leftarrow \alpha\operatorname{op}(A)\cdot\vec{x}+\beta\vec{y}\f] * @param[in] beta real parameter \f$\beta\f$ * @param[in] A real symmetric matrix \f$A\f$ stored in packed form * @param[in] trans if trans == 'n' use \f$A\f$ directly, otherwise \f$\operatorname{op}(A)\equiv{}A^\mathrm{T}\f$ * @param[in] alpha real parameter \f$\alpha\f$ * @param[in] x real vector \f$\vec{x}\f$ * @see gemm, NRSMat ******************************************************************************/ template<> void NRVec::gemv(const double beta, const NRSMat &A, const char trans, const double alpha, const NRVec &x) { #ifdef DEBUG if(A.ncols() != x.size()){ laerror("incompatible dimensions"); } #endif SAME_LOC3(*this, A, x); copyonwrite(); #ifdef CUDALA if(location==cpu){ #endif cblas_dspmv(CblasRowMajor, CblasLower, A.ncols(), alpha, A, x.v, 1, beta, v, 1); #ifdef CUDALA }else{ cublasDspmv('U', A.ncols(), alpha, A, x.v, 1, beta, v, 1); TEST_CUBLAS("cublasDspmv"); } #endif } /***************************************************************************//** * perform the \c gemv operation on this complex vector \f$\vec{y}\f$, i.e. * \f[\vec{y}\leftarrow \alpha\operatorname{op}(A)\cdot\vec{x}+\beta\vec{y}\f] * @param[in] beta real parameter \f$\beta\f$ * @param[in] A real symmetric matrix \f$A\f$ stored in packed form * @param[in] trans if trans == 'n' use \f$A\f$ directly, otherwise \f$\operatorname{op}(A)\equiv{}A^\mathrm{T}\f$ * @param[in] alpha real parameter \f$\alpha\f$ * @param[in] x complex vector \f$\vec{x}\f$ * @see gemm, NRSMat ******************************************************************************/ template<> void NRVec >::gemv(const double beta, const NRSMat &A, const char trans, const double alpha, const NRVec > &x) { #ifdef DEBUG if(A.ncols() != x.size()){ laerror("incompatible dimensions"); } #endif SAME_LOC3(*this, A, x); copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_dspmv(CblasRowMajor, CblasLower, A.ncols(), alpha, A, (double *)x.v, 2, beta, (double *)v, 2); cblas_dspmv(CblasRowMajor, CblasLower, A.ncols(), alpha, A, ((double *)x.v) + 1, 2, beta, ((double *)v) + 1, 2); #ifdef CUDALA }else{ cublasDspmv('U', A.ncols(), alpha, A, (double*)(x.v), 2, beta, (double*)v, 2); TEST_CUBLAS("cublasDspmv"); cublasDspmv('U', A.ncols(), alpha, A, ((double*)(x.v)) + 1, 2, beta, ((double*)v) + 1, 2); TEST_CUBLAS("cublasDspmv"); } #endif } /***************************************************************************//** * perform the \b gemv operation on this complex vector \f$\vec{y}\f$, i.e. * \f[\vec{y}\leftarrow \alpha\operatorname{op}(A)\cdot\vec{x}+\beta\vec{y}\f] * @param[in] beta complex parameter \f$\beta\f$ * @param[in] A complex Hermitian matrix \f$A\f$ stored in packed form * @param[in] trans not used * @param[in] alpha complex parameter \f$\alpha\f$ * @param[in] x complex vector \f$\vec{x}\f$ * @see gemm, NRSMat ******************************************************************************/ template<> void NRVec >::gemv(const std::complex beta, const NRSMat > &A, const char trans, const std::complex alpha, const NRVec > &x){ #ifdef DEBUG if(A.ncols() != x.size()) laerror("incompatible dimensions"); #endif SAME_LOC3(*this, A, x); copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_zhpmv(CblasRowMajor, CblasLower, A.ncols(), &alpha, A, x.v, 1, &beta, v, 1); #ifdef CUDALA }else{ const cuDoubleComplex _alpha = make_cuDoubleComplex(alpha.real(), alpha.imag()); const cuDoubleComplex _beta = make_cuDoubleComplex(beta.real(), beta.imag()); cublasZhpmv('U', A.ncols(), _alpha, (cuDoubleComplex*)((const std::complex*)A), (cuDoubleComplex*)(x.v), 1, _beta, (cuDoubleComplex*)(this->v), 1); TEST_CUBLAS("cublasZhpmv"); } #endif } /***************************************************************************//** * computes the outer product of this real vector \f$\vec{a}\f$ with given * real vector \f$\vec{b}\f$ and scales the resulting matrix with factor \f$\alpha\f$, i.e. * the matrix elements of the final matrix \f$A\f$ can be expressed as * \f[A_{i,j} = \alpha\cdot\vec{a}_i\vec{b}_j\f] * @param[in] b real vector \f$\vec{b}\f$ * @param[in] conj not used * @param[in] scale real factor \f$\alpha\f$ ******************************************************************************/ template<> const NRMat NRVec::otimes(const NRVec &b,const bool conj, const double &scale) const { SAME_LOC(*this, b); NRMat result(0.0, nn, b.nn, this->getlocation()); #ifdef CUDALA if(location == cpu){ #endif cblas_dger(CblasRowMajor, nn, b.nn, scale, v, 1, b.v, 1, result, b.nn); #ifdef CUDALA }else{ cublasDger(b.nn, nn, scale, b.v, 1, v, 1, result[0], b.nn); TEST_CUBLAS("cublasDger"); } #endif return result; } template<> const NRVec NRVec::otimes2vec(const NRVec &b,const bool conj, const double &scale) const { SAME_LOC(*this, b); NRVec result(0.0, nn*b.nn, this->getlocation()); #ifdef CUDALA if(location == cpu){ #endif cblas_dger(CblasRowMajor, nn, b.nn, scale, v, 1, b.v, 1, result.v, b.nn); #ifdef CUDALA }else{ cublasDger(b.nn, nn, scale, b.v, 1, v, 1, result.v, b.nn); TEST_CUBLAS("cublasDger"); } #endif return result; } /***************************************************************************//** * computes the outer product of this complex vector \f$\vec{a}\f$ with given * complex vector \f$\vec{b}\f$ and scales the resulting matrix with factor \f$\alpha\f$, i.e. * the matrix elements of the final matrix \f$A\f$ can be expressed as * \f[A_{i,j} = \alpha\cdot\vec{a}_i\vec{b}_j\f] * in case conj = true, the result is * \f[A_{i,j} = \alpha\cdot\vec{a}_i\vec{b}_j^{*}\f] * @param[in] b complex vector \f$\vec{b}\f$ * @param[in] conj determines whther the vector \f$\vec{b}\f$ is conjugated * @param[in] scale complex scaling factor \f$\alpha\f$ ******************************************************************************/ template<> const NRMat > NRVec >::otimes(const NRVec > &b, const bool conj, const std::complex &scale) const { SAME_LOC(*this, b); NRMat > result(0., nn, b.nn, this->getlocation()); #ifdef CUDALA if(location == cpu){ #endif if(conj){ cblas_zgerc(CblasRowMajor, nn, b.nn, &scale, v, 1, b.v, 1, result, b.nn); }else{ cblas_zgeru(CblasRowMajor, nn, b.nn, &scale, v, 1, b.v, 1, result, b.nn); } #ifdef CUDALA }else{ if(conj){ const cuDoubleComplex alpha = make_cuDoubleComplex(scale.real(), -scale.imag()); cublasZgerc(b.nn, nn, alpha, (cuDoubleComplex*)(b.v), 1, (cuDoubleComplex*)(v), 1, (cuDoubleComplex*)(result[0]), b.nn); TEST_CUBLAS("cublasZgerc"); result.conjugateme(); }else{ const cuDoubleComplex alpha = make_cuDoubleComplex(scale.real(), +scale.imag()); cublasZgeru(b.nn, nn, alpha, (cuDoubleComplex*)(b.v), 1, (cuDoubleComplex*)(v), 1, (cuDoubleComplex*)(result[0]), b.nn); TEST_CUBLAS("cublasZgeru"); } } #endif return result; } template<> const NRVec > NRVec >::otimes2vec(const NRVec > &b, const bool conj, const std::complex &scale) const { SAME_LOC(*this, b); NRVec > result(0., nn*b.nn, this->getlocation()); #ifdef CUDALA if(location == cpu){ #endif if(conj){ cblas_zgerc(CblasRowMajor, nn, b.nn, &scale, v, 1, b.v, 1, result.v, b.nn); }else{ cblas_zgeru(CblasRowMajor, nn, b.nn, &scale, v, 1, b.v, 1, result.v, b.nn); } #ifdef CUDALA }else{ if(conj){ const cuDoubleComplex alpha = make_cuDoubleComplex(scale.real(), -scale.imag()); cublasZgerc(b.nn, nn, alpha, (cuDoubleComplex*)(b.v), 1, (cuDoubleComplex*)(v), 1, (cuDoubleComplex*)(result.v), b.nn); TEST_CUBLAS("cublasZgerc"); result.conjugateme(); }else{ const cuDoubleComplex alpha = make_cuDoubleComplex(scale.real(), +scale.imag()); cublasZgeru(b.nn, nn, alpha, (cuDoubleComplex*)(b.v), 1, (cuDoubleComplex*)(v), 1, (cuDoubleComplex*)(result.v), b.nn); TEST_CUBLAS("cublasZgeru"); } } #endif return result; } template<> NRVec > complexify(const NRVec &rhs) { NRVec > r(rhs.size(), rhs.getlocation()); #ifdef CUDALA if(rhs.getlocation() == cpu){ #endif cblas_dcopy(rhs.size(), &rhs[0], 1, (double *)(&r[0]), 2); #ifdef CUDALA }else{ r = 0; cublasDcopy(rhs.size(), rhs.v, 1, (double*)(r.v), 2); TEST_CUBLAS("cublasDcopy"); } #endif return r; } template void NRVec::permuteme(const CyclePerm &p) { #ifdef DEBUG if(!p.is_valid()) laerror("invalid permutation of vector"); #endif if(p.max()>nn) laerror("incompatible permutation and vector"); #ifdef CUDALA if(this->getlocation() != cpu || p.getlocation() != cpu ) laerror("permutations can be done only in CPU memory"); #endif copyonwrite(); for(int cycle=1; cycle<=p.size(); ++cycle) { int length= p[cycle].size(); if(length<=1) continue; //trivial cycle T tmp = v[p[cycle][length]-1]; for(int i=length; i>1; --i) v[p[cycle][i]-1] = v[p[cycle][i-1]-1]; v[p[cycle][1]-1] = tmp; } } template const int NRVec::find(const T &val) const { for(int i=0; i const int NRVec::findthr(const T &val, const typename LA_traits::normtype &thr) const { for(int i=0; i const NRVec NRVec::subvector(const int from, const int to) const { #ifdef DEBUG if(from<0 || from>=nn|| to<0 || to>=nn || from>to){ laerror("invalid subvector specification"); } #endif const int n = to - from + 1; NRVec r(n, getlocation()); if(!LA_traits::is_plaindata()) laerror("only implemented for plain data"); #ifdef CUDALA if(location == cpu){ #endif memcpy(r.v, v+from, n*sizeof(T)); #ifdef CUDALA }else{ if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem"); cublasScopy(n*sizeof(T)/sizeof(float), (const float *)(v+from), 1, (float*)r.v, 1); TEST_CUBLAS("cublasScopy"); } #endif return r; } template const NRVec NRVec::subvector(const NRVec &selection) const { NOT_GPU(*this); const int n = selection.size(); NRVec r(n); for(int i=0; i=nn) laerror("bad row index in subvector"); r[i] = (*this)[ii]; } return r; } /***************************************************************************//** * places given vector as subvector at given position * @param[in] from coordinate * @param[in] rhs input vector ******************************************************************************/ template void NRVec::storesubvector(const int from, const NRVec &rhs) { const int to = from + rhs.size() - 1; #ifdef DEBUG if(from<0 || from>=nn || to>=nn) laerror("bad indices in storesubvector"); #endif SAME_LOC(*this, rhs); if(!LA_traits::is_plaindata()) laerror("only implemented for plain data"); #ifdef CUDALA if(location == cpu){ #endif memcpy(v+from, rhs.v, rhs.size()*sizeof(T)); #ifdef CUDALA }else{ if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem"); cublasScopy(rhs.size()*sizeof(T)/sizeof(float), (const float *) (rhs.v), 1, (float *)(v + from), 1); } #endif } template void NRVec::storesubvector(const NRVec &selection, const NRVec &rhs) { NOT_GPU(*this); const int n = selection.size(); if(n!=rhs.size()) laerror("size mismatch in storesubvector"); for(int i=0; i=nn) laerror("bad index in storesubvector"); (*this)[ii] = rhs[i]; } } /***************************************************************************//** * conjugate this general vector * @return reference to the (unmodified) matrix ******************************************************************************/ template NRVec& NRVec::conjugateme() { copyonwrite(); #ifdef CUDALA if(location != cpu) laerror("general conjugation only on CPU"); #endif for(int i=0; i::conjugate(v[i]); return *this; } /***************************************************************************//** * conjugate this complex vector * @return reference to the modified matrix ******************************************************************************/ template<> NRVec >& NRVec >::conjugateme() { copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_dscal((size_t)nn, -1.0, ((double *)v) + 1, 2); #ifdef CUDALA }else{ cublasDscal((size_t)nn, -1.0, ((double *)v) + 1, 2); } #endif return *this; } template<> NRVec >& NRVec >::conjugateme() { copyonwrite(); #ifdef CUDALA if(location == cpu){ #endif cblas_sscal((size_t)nn, -1.0, ((float *)v) + 1, 2); #ifdef CUDALA }else{ cublasSscal((size_t)nn, -1.0, ((float *)v) + 1, 2); } #endif return *this; } /***************************************************************************//** * forced instantization in the corespoding object file ******************************************************************************/ /* Commented out by Roman for ICC #define INSTANTIZE(T) \ template void NRVec::put(int fd, bool dim, bool transp) const; \ template void NRVec::get(int fd, bool dim, bool transp); \ INSTANTIZE(double) INSTANTIZE(std::complex) INSTANTIZE(char) INSTANTIZE(short) INSTANTIZE(int) INSTANTIZE(long) INSTANTIZE(long long) INSTANTIZE(unsigned char) INSTANTIZE(unsigned short) INSTANTIZE(unsigned int) INSTANTIZE(unsigned long) INSTANTIZE(unsigned long long) */ #define INSTANTIZE_DUMMY(T) \ template<> void NRVec::gemv(const T beta, const NRMat &a, const char trans, const T alpha, const NRVec &x) { laerror("gemv on unsupported types"); } \ template<> void NRVec::gemv(const T beta, const NRSMat &a, const char trans, const T alpha, const NRVec &x) { laerror("gemv on unsupported types"); } \ template<> void NRVec::gemv(const T beta, const SparseMat &a, const char trans, const T alpha, const NRVec &x, bool s) { laerror("gemv on unsupported types"); } \ template<> void NRVec::gemv(const LA_traits_complex::Component_type beta, const LA_traits_complex::NRMat_Noncomplex_type &a, const char trans, const LA_traits_complex::Component_type alpha, const NRVec &x) { laerror("gemv on unsupported types"); } \ template<> void NRVec::gemv(const LA_traits_complex::Component_type beta, const LA_traits_complex::NRSMat_Noncomplex_type &a, const char trans, const LA_traits_complex::Component_type alpha, const NRVec &x) { laerror("gemv on unsupported types"); } \ template<> NRVec & NRVec::normalize(LA_traits::normtype *) {laerror("normalize() impossible for integer types"); return *this;} \ template<> const NRMat NRVec::otimes(const NRVec &b,const bool conj, const T &scale) const {laerror("otimes presently implemented only for double and complex double"); return NRMat ();}\ template<> const NRVec NRVec::otimes2vec(const NRVec &b,const bool conj, const T &scale) const {laerror("otimes2vec presently implemented only for double and complex double"); return NRVec ();}\ INSTANTIZE_DUMMY(char) INSTANTIZE_DUMMY(short) INSTANTIZE_DUMMY(int) INSTANTIZE_DUMMY(long) INSTANTIZE_DUMMY(long long) INSTANTIZE_DUMMY(unsigned char) INSTANTIZE_DUMMY(unsigned short) INSTANTIZE_DUMMY(unsigned int) INSTANTIZE_DUMMY(unsigned long) INSTANTIZE_DUMMY(unsigned long long) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex) INSTANTIZE_DUMMY(std::complex >) INSTANTIZE_DUMMY(std::complex >) //also not supported on gpu #define INSTANTIZE_NONCOMPLEX(T) \ template<>\ const T NRVec::max() const\ {\ if(nn==0) return 0;\ T m=v[0];\ for(int i=1; im) m=v[i];\ return m;\ }\ \ template<>\ const T NRVec::min() const\ {\ if(nn==0) return 0;\ T m=v[0];\ for(int i=1; i \ NRVec NRVec::concat(const NRVec &rhs) const \ { \ if(nn==0) return rhs; \ if(rhs.nn==0) return *this; \ NOT_GPU(*this); \ NOT_GPU(rhs); \ NRVec r(nn+rhs.nn); \ memcpy(r.v,v,nn*sizeof(T)); \ memcpy(r.v+nn,rhs.v,rhs.nn*sizeof(T)); \ return r; \ } \ INSTANTIZE_CONCAT(char) INSTANTIZE_CONCAT(unsigned char) INSTANTIZE_CONCAT(short) INSTANTIZE_CONCAT(unsigned short) INSTANTIZE_CONCAT(int) INSTANTIZE_CONCAT(unsigned int) INSTANTIZE_CONCAT(long) INSTANTIZE_CONCAT(unsigned long) INSTANTIZE_CONCAT(long long) INSTANTIZE_CONCAT(unsigned long long) INSTANTIZE_CONCAT(float) INSTANTIZE_CONCAT(double) INSTANTIZE_CONCAT(std::complex) INSTANTIZE_CONCAT(std::complex) //template class NRVec; //template class NRVec >; template class NRVec; template class NRVec >; template class NRVec; template class NRVec; template class NRVec; template class NRVec; template class NRVec; template class NRVec; template class NRVec; template class NRVec; template class NRVec; template class NRVec; }//namespace