diff --git a/vec.cc b/vec.cc index a048009..8a457d6 100644 --- a/vec.cc +++ b/vec.cc @@ -704,6 +704,24 @@ const NRMat NRVec::otimes(const NRVec &b,const bool conj return result; } +template<> +const NRVec NRVec::otimes2vec(const NRVec &b,const bool conj, const double &scale) const { + + SAME_LOC(*this, b); + NRVec result(0.0, nn*b.nn, this->getlocation()); +#ifdef CUDALA + if(location == cpu){ +#endif + cblas_dger(CblasRowMajor, nn, b.nn, scale, v, 1, b.v, 1, result.v, b.nn); +#ifdef CUDALA + }else{ + cublasDger(b.nn, nn, scale, b.v, 1, v, 1, result.v, b.nn); + TEST_CUBLAS("cublasDger"); + } +#endif + return result; +} + /***************************************************************************//** * computes the outer product of this complex vector \f$\vec{a}\f$ with given * complex vector \f$\vec{b}\f$ and scales the resulting matrix with factor \f$\alpha\f$, i.e. @@ -750,6 +768,42 @@ NRVec >::otimes(const NRVec > &b, cons return result; } +template<> +const NRVec > +NRVec >::otimes2vec(const NRVec > &b, const bool conj, const std::complex &scale) const { + + SAME_LOC(*this, b); + NRVec > result(0., nn*b.nn, this->getlocation()); + +#ifdef CUDALA + if(location == cpu){ +#endif + if(conj){ + cblas_zgerc(CblasRowMajor, nn, b.nn, &scale, v, 1, b.v, 1, result.v, b.nn); + }else{ + cblas_zgeru(CblasRowMajor, nn, b.nn, &scale, v, 1, b.v, 1, result.v, b.nn); + } +#ifdef CUDALA + }else{ + if(conj){ + const cuDoubleComplex alpha = make_cuDoubleComplex(scale.real(), -scale.imag()); + + cublasZgerc(b.nn, nn, alpha, (cuDoubleComplex*)(b.v), 1, (cuDoubleComplex*)(v), 1, (cuDoubleComplex*)(result.v), b.nn); + TEST_CUBLAS("cublasZgerc"); + + result.conjugateme(); + }else{ + const cuDoubleComplex alpha = make_cuDoubleComplex(scale.real(), +scale.imag()); + + cublasZgeru(b.nn, nn, alpha, (cuDoubleComplex*)(b.v), 1, (cuDoubleComplex*)(v), 1, (cuDoubleComplex*)(result.v), b.nn); + TEST_CUBLAS("cublasZgeru"); + } + } +#endif + return result; +} + + template<> NRVec > complexify(const NRVec &rhs) { NRVec > r(rhs.size(), rhs.getlocation()); @@ -988,7 +1042,8 @@ template<> void NRVec::gemv(const T beta, const SparseMat &a, const char t template<> void NRVec::gemv(const LA_traits_complex::Component_type beta, const LA_traits_complex::NRMat_Noncomplex_type &a, const char trans, const LA_traits_complex::Component_type alpha, const NRVec &x) { laerror("gemv on unsupported types"); } \ template<> void NRVec::gemv(const LA_traits_complex::Component_type beta, const LA_traits_complex::NRSMat_Noncomplex_type &a, const char trans, const LA_traits_complex::Component_type alpha, const NRVec &x) { laerror("gemv on unsupported types"); } \ template<> NRVec & NRVec::normalize(LA_traits::normtype *) {laerror("normalize() impossible for integer types"); return *this;} \ -template<> const NRMat NRVec::otimes(const NRVec &b,const bool conj, const T &scale) const {laerror("otimes presently implemented only for double and complex double"); return NRMat ();} +template<> const NRMat NRVec::otimes(const NRVec &b,const bool conj, const T &scale) const {laerror("otimes presently implemented only for double and complex double"); return NRMat ();}\ +template<> const NRVec NRVec::otimes2vec(const NRVec &b,const bool conj, const T &scale) const {laerror("otimes2vec presently implemented only for double and complex double"); return NRVec ();}\ diff --git a/vec.h b/vec.h index c043007..d68fb38 100644 --- a/vec.h +++ b/vec.h @@ -130,7 +130,8 @@ public: }; //! inlined constructor creating vector of given size filled with prescribed value - inline NRVec(const T &a, const int n); + //inline NRVec(const T &a, const int n); + inline NRVec(const T &a, const int n, const GPUID loc = undefined); //! inlined constructor creating vector froman array template inline NRVec(const T (&a)[SIZE]); @@ -356,9 +357,13 @@ public: //! compute the outer product of two vectors const NRMat otimes(const NRVec &rhs, const bool conjugate = false, const T &scale = 1) const; + //! opeartor for outer product computation inline const NRMat operator|(const NRVec &rhs) const { return otimes(rhs,true); }; + //! compute the outer product of two vectors, result interpreted as a vector + const NRVec otimes2vec(const NRVec &rhs, const bool conjugate = false, const T &scale = 1) const; + //! compute the sum of the vector elements inline const T sum() const { T sum(v[0]); @@ -678,6 +683,8 @@ std::istream & operator>>(std::istream &s, NRVec &x) { * @param[in] a value to be assigned to all vector elements * @param[in] n required vector size ******************************************************************************/ + +/* replaced by the one with optional GPUID template inline NRVec::NRVec(const T& a, const int n): nn(n), count(new int) { *count = 1; @@ -699,6 +706,30 @@ inline NRVec::NRVec(const T& a, const int n): nn(n), count(new int) { } #endif } +*/ + +template +inline NRVec::NRVec(const T& a, const int n, const GPUID loc): nn(n), count(new int) { + *count = 1; +#ifdef CUDALA + location = (loc==undefined?DEFAULT_LOC:loc); + if(location == cpu){ +#endif + v = new T[n]; + if(!LA_traits::is_plaindata() || a != (T)0){ + for(register int i=0; i::is_plaindata()) laerror("only implemented for plain data"); + smart_gpu_set(n, a, v); + } +#endif +} /***************************************************************************//** * inline constructor creating vector from an array