NRVec: constructor with GPU location option, otimes2vec
This commit is contained in:
parent
0b91e88dca
commit
ea2b494abb
57
vec.cc
57
vec.cc
@ -704,6 +704,24 @@ const NRMat<double> NRVec<double>::otimes(const NRVec<double> &b,const bool conj
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
const NRVec<double> NRVec<double>::otimes2vec(const NRVec<double> &b,const bool conj, const double &scale) const {
|
||||||
|
|
||||||
|
SAME_LOC(*this, b);
|
||||||
|
NRVec<double> result(0.0, nn*b.nn, this->getlocation());
|
||||||
|
#ifdef CUDALA
|
||||||
|
if(location == cpu){
|
||||||
|
#endif
|
||||||
|
cblas_dger(CblasRowMajor, nn, b.nn, scale, v, 1, b.v, 1, result.v, b.nn);
|
||||||
|
#ifdef CUDALA
|
||||||
|
}else{
|
||||||
|
cublasDger(b.nn, nn, scale, b.v, 1, v, 1, result.v, b.nn);
|
||||||
|
TEST_CUBLAS("cublasDger");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/***************************************************************************//**
|
/***************************************************************************//**
|
||||||
* computes the outer product of this complex vector \f$\vec{a}\f$ with given
|
* computes the outer product of this complex vector \f$\vec{a}\f$ with given
|
||||||
* complex vector \f$\vec{b}\f$ and scales the resulting matrix with factor \f$\alpha\f$, i.e.
|
* complex vector \f$\vec{b}\f$ and scales the resulting matrix with factor \f$\alpha\f$, i.e.
|
||||||
@ -750,6 +768,42 @@ NRVec<std::complex<double> >::otimes(const NRVec<std::complex<double> > &b, cons
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
const NRVec<std::complex<double> >
|
||||||
|
NRVec<std::complex<double> >::otimes2vec(const NRVec<std::complex<double> > &b, const bool conj, const std::complex<double> &scale) const {
|
||||||
|
|
||||||
|
SAME_LOC(*this, b);
|
||||||
|
NRVec<std::complex<double> > result(0., nn*b.nn, this->getlocation());
|
||||||
|
|
||||||
|
#ifdef CUDALA
|
||||||
|
if(location == cpu){
|
||||||
|
#endif
|
||||||
|
if(conj){
|
||||||
|
cblas_zgerc(CblasRowMajor, nn, b.nn, &scale, v, 1, b.v, 1, result.v, b.nn);
|
||||||
|
}else{
|
||||||
|
cblas_zgeru(CblasRowMajor, nn, b.nn, &scale, v, 1, b.v, 1, result.v, b.nn);
|
||||||
|
}
|
||||||
|
#ifdef CUDALA
|
||||||
|
}else{
|
||||||
|
if(conj){
|
||||||
|
const cuDoubleComplex alpha = make_cuDoubleComplex(scale.real(), -scale.imag());
|
||||||
|
|
||||||
|
cublasZgerc(b.nn, nn, alpha, (cuDoubleComplex*)(b.v), 1, (cuDoubleComplex*)(v), 1, (cuDoubleComplex*)(result.v), b.nn);
|
||||||
|
TEST_CUBLAS("cublasZgerc");
|
||||||
|
|
||||||
|
result.conjugateme();
|
||||||
|
}else{
|
||||||
|
const cuDoubleComplex alpha = make_cuDoubleComplex(scale.real(), +scale.imag());
|
||||||
|
|
||||||
|
cublasZgeru(b.nn, nn, alpha, (cuDoubleComplex*)(b.v), 1, (cuDoubleComplex*)(v), 1, (cuDoubleComplex*)(result.v), b.nn);
|
||||||
|
TEST_CUBLAS("cublasZgeru");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
NRVec<std::complex<double> > complexify(const NRVec<double> &rhs) {
|
NRVec<std::complex<double> > complexify(const NRVec<double> &rhs) {
|
||||||
NRVec<std::complex<double> > r(rhs.size(), rhs.getlocation());
|
NRVec<std::complex<double> > r(rhs.size(), rhs.getlocation());
|
||||||
@ -988,7 +1042,8 @@ template<> void NRVec<T>::gemv(const T beta, const SparseMat<T> &a, const char t
|
|||||||
template<> void NRVec<T>::gemv(const LA_traits_complex<T>::Component_type beta, const LA_traits_complex<T>::NRMat_Noncomplex_type &a, const char trans, const LA_traits_complex<T>::Component_type alpha, const NRVec<T> &x) { laerror("gemv on unsupported types"); } \
|
template<> void NRVec<T>::gemv(const LA_traits_complex<T>::Component_type beta, const LA_traits_complex<T>::NRMat_Noncomplex_type &a, const char trans, const LA_traits_complex<T>::Component_type alpha, const NRVec<T> &x) { laerror("gemv on unsupported types"); } \
|
||||||
template<> void NRVec<T>::gemv(const LA_traits_complex<T>::Component_type beta, const LA_traits_complex<T>::NRSMat_Noncomplex_type &a, const char trans, const LA_traits_complex<T>::Component_type alpha, const NRVec<T> &x) { laerror("gemv on unsupported types"); } \
|
template<> void NRVec<T>::gemv(const LA_traits_complex<T>::Component_type beta, const LA_traits_complex<T>::NRSMat_Noncomplex_type &a, const char trans, const LA_traits_complex<T>::Component_type alpha, const NRVec<T> &x) { laerror("gemv on unsupported types"); } \
|
||||||
template<> NRVec<T> & NRVec<T>::normalize(LA_traits<T>::normtype *) {laerror("normalize() impossible for integer types"); return *this;} \
|
template<> NRVec<T> & NRVec<T>::normalize(LA_traits<T>::normtype *) {laerror("normalize() impossible for integer types"); return *this;} \
|
||||||
template<> const NRMat<T> NRVec<T>::otimes(const NRVec<T> &b,const bool conj, const T &scale) const {laerror("otimes presently implemented only for double and complex double"); return NRMat<T> ();}
|
template<> const NRMat<T> NRVec<T>::otimes(const NRVec<T> &b,const bool conj, const T &scale) const {laerror("otimes presently implemented only for double and complex double"); return NRMat<T> ();}\
|
||||||
|
template<> const NRVec<T> NRVec<T>::otimes2vec(const NRVec<T> &b,const bool conj, const T &scale) const {laerror("otimes2vec presently implemented only for double and complex double"); return NRVec<T> ();}\
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
33
vec.h
33
vec.h
@ -130,7 +130,8 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
//! inlined constructor creating vector of given size filled with prescribed value
|
//! inlined constructor creating vector of given size filled with prescribed value
|
||||||
inline NRVec(const T &a, const int n);
|
//inline NRVec(const T &a, const int n);
|
||||||
|
inline NRVec(const T &a, const int n, const GPUID loc = undefined);
|
||||||
|
|
||||||
//! inlined constructor creating vector froman array
|
//! inlined constructor creating vector froman array
|
||||||
template<int SIZE> inline NRVec(const T (&a)[SIZE]);
|
template<int SIZE> inline NRVec(const T (&a)[SIZE]);
|
||||||
@ -356,9 +357,13 @@ public:
|
|||||||
|
|
||||||
//! compute the outer product of two vectors
|
//! compute the outer product of two vectors
|
||||||
const NRMat<T> otimes(const NRVec<T> &rhs, const bool conjugate = false, const T &scale = 1) const;
|
const NRMat<T> otimes(const NRVec<T> &rhs, const bool conjugate = false, const T &scale = 1) const;
|
||||||
|
|
||||||
//! opeartor for outer product computation
|
//! opeartor for outer product computation
|
||||||
inline const NRMat<T> operator|(const NRVec<T> &rhs) const { return otimes(rhs,true); };
|
inline const NRMat<T> operator|(const NRVec<T> &rhs) const { return otimes(rhs,true); };
|
||||||
|
|
||||||
|
//! compute the outer product of two vectors, result interpreted as a vector
|
||||||
|
const NRVec otimes2vec(const NRVec<T> &rhs, const bool conjugate = false, const T &scale = 1) const;
|
||||||
|
|
||||||
//! compute the sum of the vector elements
|
//! compute the sum of the vector elements
|
||||||
inline const T sum() const {
|
inline const T sum() const {
|
||||||
T sum(v[0]);
|
T sum(v[0]);
|
||||||
@ -678,6 +683,8 @@ std::istream & operator>>(std::istream &s, NRVec<T> &x) {
|
|||||||
* @param[in] a value to be assigned to all vector elements
|
* @param[in] a value to be assigned to all vector elements
|
||||||
* @param[in] n required vector size
|
* @param[in] n required vector size
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
|
|
||||||
|
/* replaced by the one with optional GPUID
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline NRVec<T>::NRVec(const T& a, const int n): nn(n), count(new int) {
|
inline NRVec<T>::NRVec(const T& a, const int n): nn(n), count(new int) {
|
||||||
*count = 1;
|
*count = 1;
|
||||||
@ -699,6 +706,30 @@ inline NRVec<T>::NRVec(const T& a, const int n): nn(n), count(new int) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline NRVec<T>::NRVec(const T& a, const int n, const GPUID loc): nn(n), count(new int) {
|
||||||
|
*count = 1;
|
||||||
|
#ifdef CUDALA
|
||||||
|
location = (loc==undefined?DEFAULT_LOC:loc);
|
||||||
|
if(location == cpu){
|
||||||
|
#endif
|
||||||
|
v = new T[n];
|
||||||
|
if(!LA_traits<T>::is_plaindata() || a != (T)0){
|
||||||
|
for(register int i=0; i<n; i++) v[i] = a;
|
||||||
|
}else{
|
||||||
|
memset(v, 0, nn*sizeof(T));
|
||||||
|
}
|
||||||
|
#ifdef CUDALA
|
||||||
|
}else{
|
||||||
|
if(sizeof(T)%sizeof(float) != 0)laerror("memory alignment error");
|
||||||
|
v = (T*) gpualloc(n*sizeof(T));
|
||||||
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
||||||
|
smart_gpu_set(n, a, v);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/***************************************************************************//**
|
/***************************************************************************//**
|
||||||
* inline constructor creating vector from an array
|
* inline constructor creating vector from an array
|
||||||
|
Loading…
Reference in New Issue
Block a user