*** empty log message ***

This commit is contained in:
jiri
2010-06-25 15:28:19 +00:00
parent eb0aaf9adf
commit 074c943862
13 changed files with 1938 additions and 464 deletions

714
vec.h
View File

@@ -30,6 +30,9 @@ template <typename T> void lawritemat(FILE *file,const T *a,int r,int c,
// Memory allocated constants for cblas routines
const static complex<double> CONE = 1.0, CMONE = -1.0, CZERO = 0.0;
#ifdef CUDALA
const static cuDoubleComplex CUONE = {1.,0.}, CUMONE = {-1.,0.}, CUZERO = {0.,0.};
#endif
// Macros to construct binary operators +,-,*, from +=, -=, *=
// for 3 cases: X + a, a + X, X + Y
@@ -44,7 +47,7 @@ template<class T> \
#define NRVECMAT_OPER2(E,X) \
template<class T> \
inline const NR##E<T> NR##E<T>::operator X(const NR##E<T> &a) const \
inline const NR##E<T> NR##E<T>::operator X(const NR##E<T> &a) const \
{ return NR##E(*this) X##= a; }
@@ -55,12 +58,32 @@ protected:
int nn;
T *v;
int *count;
#ifdef CUDALA
GPUID location;
#endif
public:
friend class NRSMat<T>;
friend class NRMat<T>;
inline NRVec(): nn(0),v(0),count(0){};
explicit inline NRVec(const int n) : nn(n), v(new T[n]), count(new int(1)) {};
inline NRVec(): nn(0),v(0),count(0)
{
#ifdef CUDALA
location = DEFAULT_LOC;
#endif
};
explicit inline NRVec(const int n, const GPUID loc= undefined) : nn(n), count(new int(1))
{
#ifdef CUDALA
if(loc==undefined) location = DEFAULT_LOC; else location = loc;
if(location==cpu)
#endif
v= new T[n];
#ifdef CUDALA
else
v= (T*) gpualloc(n*sizeof(T));
#endif
};
inline NRVec(const T &a, const int n);
inline NRVec(const T *a, const int n);
inline NRVec(T *a, const int n, bool skeleton);
@@ -71,6 +94,13 @@ public:
explicit NRVec(const NRMat<T> &rhs) : NRVec(&rhs[0][0],rhs.nrows()*rhs.ncols()) {};
#else
explicit NRVec(const NRMat<T> &rhs);
#endif
#ifdef CUDALA
inline GPUID getlocation() const {return location;}
void moveto(const GPUID dest);
#else
inline GPUID getlocation() const {return cpu;}
void moveto(const GPUID dest) {};
#endif
NRVec & operator=(const NRVec &rhs);
NRVec & operator=(const T &a); //assign a to every element
@@ -103,8 +133,8 @@ public:
void gemv(const T beta, const SparseMat<T> &a, const char trans, const T alpha, const NRVec &x,const bool treat_as_symmetric=false);
void gemv(const typename LA_traits_complex<T>::Component_type beta, const typename LA_traits_complex<T>::NRMat_Noncomplex_type &a, const char trans, const typename LA_traits_complex<T>::Component_type alpha, const NRVec &x);
void gemv(const typename LA_traits_complex<T>::Component_type beta, const typename LA_traits_complex<T>::NRSMat_Noncomplex_type &a, const char trans, const typename LA_traits_complex<T>::Component_type alpha, const NRVec &x);
const NRVec operator*(const NRMat<T> &mat) const {NRVec<T> result(mat.ncols()); result.gemv((T)0,mat,'t',(T)1,*this); return result;};
const NRVec operator*(const NRSMat<T> &mat) const {NRVec<T> result(mat.ncols()); result.gemv((T)0,mat,'t',(T)1,*this); return result;};
const NRVec operator*(const NRMat<T> &mat) const {NRVec<T> result(mat.ncols(),mat.getlocation()); result.gemv((T)0,mat,'t',(T)1,*this); return result;};
const NRVec operator*(const NRSMat<T> &mat) const {NRVec<T> result(mat.ncols(),mat.getlocation()); result.gemv((T)0,mat,'t',(T)1,*this); return result;};
const NRVec operator*(const SparseMat<T> &mat) const {NRVec<T> result(mat.ncols()); result.gemv((T)0,mat,'t',(T)1,*this); return result;};
const NRMat<T> otimes(const NRVec<T> &rhs, const bool conjugate=false, const T &scale=1) const; //outer product
inline const NRMat<T> operator|(const NRVec<T> &rhs) const {return otimes(rhs,true);};
@@ -150,29 +180,58 @@ public:
#include "sparsemat.h"
#include "sparsesmat.h"
namespace LA {
// formatted I/O
template <typename T>
std::ostream & operator<<(std::ostream &s, const NRVec<T> &x)
{
#ifdef CUDALA
if(x.getlocation()==cpu)
{
#endif
int i, n;
n = x.size();
s << n << std::endl;
for(i=0; i<n; i++) s << (typename LA_traits_io<T>::IOtype)x[i] << (i == n-1 ? '\n' : ' ');
return s;
#ifdef CUDALA
}
else
{
NRVec<T> tmp=x;
tmp.moveto(cpu);
return s<<tmp;
}
#endif
}
template <typename T>
std::istream & operator>>(std::istream &s, NRVec<T> &x)
{
#ifdef CUDALA
if(x.getlocation()==cpu)
{
#endif
int i,n;
s >> n;
x.resize(n);
typename LA_traits_io<T>::IOtype tmp;
for(i=0; i<n; i++) {s >> tmp; x[i]=tmp;}
return s;
#ifdef CUDALA
}
else
{
NRVec<T> tmp;
tmp.moveto(cpu);
s >> tmp;
tmp.moveto(x.getlocation());
x=tmp;
return s;
}
#endif
}
@@ -180,22 +239,51 @@ std::istream & operator>>(std::istream &s, NRVec<T> &x)
// ctors
template <typename T>
inline NRVec<T>::NRVec(const T& a, const int n) : nn(n), v(new T[n]), count(new int)
inline NRVec<T>::NRVec(const T& a, const int n) : nn(n), count(new int)
{
*count = 1;
#ifdef CUDALA
location=DEFAULT_LOC;
if(location==cpu)
{
#endif
v = new T[n];
if(a != (T)0)
for(int i=0; i<n; i++)
v[i] = a;
else
memset(v, 0, nn*sizeof(T));
#ifdef CUDALA
}
else
{
v= (T*) gpualloc(n*sizeof(T));
cublasSetVector(n,sizeof(T),&a,0,v,1);
}
#endif
}
template <typename T>
inline NRVec<T>::NRVec(const T *a, const int n) : nn(n), count(new int)
{
v=new T[n];
*count = 1;
memcpy(v, a, n*sizeof(T));
#ifdef CUDALA
location=DEFAULT_LOC;
if(location==cpu)
{
#endif
v=new T[n];
*count = 1;
memcpy(v, a, n*sizeof(T));
#ifdef CUDALA
}
else
{
v= (T*) gpualloc(n*sizeof(T));
cublasSetVector(n,sizeof(T),a,1,v,1);
}
#endif
}
template <typename T>
@@ -203,12 +291,28 @@ inline NRVec<T>::NRVec(T *a, const int n, bool skeleton) : nn(n), count(new int)
{
if(!skeleton)
{
#ifdef CUDALA
location=DEFAULT_LOC;
if(location==cpu)
{
#endif
v=new T[n];
*count = 1;
memcpy(v, a, n*sizeof(T));
#ifdef CUDALA
}
else
{
v= (T*) gpualloc(n*sizeof(T));
cublasSetVector(n,sizeof(T),a,1,v,1);
}
#endif
}
else
{
#ifdef CUDALA
if(location!=cpu) laerror("NRVec() with skeleton option cannot be on GPU");
#endif
*count = 2;
v=a;
}
@@ -217,6 +321,9 @@ inline NRVec<T>::NRVec(T *a, const int n, bool skeleton) : nn(n), count(new int)
template <typename T>
inline NRVec<T>::NRVec(const NRVec<T> &rhs)
{
#ifdef CUDALA
location=rhs.location;
#endif
v = rhs.v;
nn = rhs.nn;
count = rhs.count;
@@ -226,6 +333,9 @@ inline NRVec<T>::NRVec(const NRVec<T> &rhs)
template <typename T>
inline NRVec<T>::NRVec(const NRSMat<T> &rhs)
{
#ifdef CUDALA
location=rhs.location;
#endif
nn = rhs.nn;
nn = NN2;
v = rhs.v;
@@ -233,28 +343,11 @@ inline NRVec<T>::NRVec(const NRSMat<T> &rhs)
(*count)++;
}
// x += a
template<>
inline NRVec<double> & NRVec<double>::operator+=(const double &a)
{
copyonwrite();
cblas_daxpy(nn, 1.0, &a, 0, v, 1);
return *this;
}
template<>
inline NRVec< complex<double> > &
NRVec< complex<double> >::operator+=(const complex<double> &a)
{
copyonwrite();
cblas_zaxpy(nn, &CONE, &a, 0, v, 1);
return *this;
}
//and for general type
// x +/-= a
template <typename T>
inline NRVec<T> & NRVec<T>::operator+=(const T &a)
{
NOT_GPU(*this);
copyonwrite();
int i;
for(i=0; i<nn; ++i) v[i]+=a;
@@ -262,65 +355,26 @@ inline NRVec<T> & NRVec<T>::operator+=(const T &a)
}
// x -= a
template<>
inline NRVec<double> & NRVec<double>::operator-=(const double &a)
{
copyonwrite();
cblas_daxpy(nn, -1.0, &a, 0, v, 1);
return *this;
}
template<>
inline NRVec< complex<double> > &
NRVec< complex<double> >::operator-=(const complex<double> &a)
{
copyonwrite();
cblas_zaxpy(nn, &CMONE, &a, 0, v, 1);
return *this;
}
//and for general type
template <typename T>
inline NRVec<T> & NRVec<T>::operator-=(const T &a)
{
NOT_GPU(*this);
copyonwrite();
int i;
for(i=0; i<nn; ++i) v[i]-=a;
int i;
for(i=0; i<nn; ++i) v[i]-=a;
return *this;
}
// x += x
template<>
inline NRVec<double> & NRVec<double>::operator+=(const NRVec<double> &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
#endif
copyonwrite();
cblas_daxpy(nn, 1.0, rhs.v, 1, v, 1);
return *this;
}
template<>
inline NRVec< complex<double> > &
NRVec< complex<double> >::operator+=(const NRVec< complex<double> > &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
#endif
copyonwrite();
cblas_zaxpy(nn, &CONE, rhs.v, 1, v, 1);
return *this;
}
//and for general type
template <typename T>
inline NRVec<T> & NRVec<T>::operator+=(const NRVec<T> &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
NOT_GPU(*this);
NOT_GPU(rhs);
#endif
copyonwrite();
int i;
@@ -346,6 +400,8 @@ inline NRVec<T> & NRVec<T>::operator/=(const NRVec<T> &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("/= of incompatible vectors");
NOT_GPU(*this);
NOT_GPU(rhs);
#endif
copyonwrite();
int i;
@@ -356,35 +412,13 @@ inline NRVec<T> & NRVec<T>::operator/=(const NRVec<T> &rhs)
// x -= x
template<>
inline NRVec<double> & NRVec<double>::operator-=(const NRVec<double> &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
#endif
copyonwrite();
cblas_daxpy(nn, -1.0, rhs.v, 1, v, 1);
return *this;
}
template<>
inline NRVec< complex<double> > &
NRVec< complex<double> >::operator-=(const NRVec< complex<double> > &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
#endif
copyonwrite();
cblas_zaxpy(nn, &CMONE, rhs.v, 1, v, 1);
return *this;
}
//and for general type
template <typename T>
inline NRVec<T> & NRVec<T>::operator-=(const NRVec<T> &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
NOT_GPU(*this);
NOT_GPU(rhs);
#endif
copyonwrite();
int i;
@@ -394,27 +428,10 @@ inline NRVec<T> & NRVec<T>::operator-=(const NRVec<T> &rhs)
// x *= a
template<>
inline NRVec<double> & NRVec<double>::operator*=(const double &a)
{
copyonwrite();
cblas_dscal(nn, a, v, 1);
return *this;
}
template<>
inline NRVec< complex<double> > &
NRVec< complex<double> >::operator*=(const complex<double> &a)
{
copyonwrite();
cblas_zscal(nn, &a, v, 1);
return *this;
}
//and for general type
template <typename T>
inline NRVec<T> & NRVec<T>::operator*=(const T &a)
{
NOT_GPU(*this);
copyonwrite();
int i;
for(i=0; i<nn; ++i) v[i]*=a;
@@ -423,33 +440,13 @@ inline NRVec<T> & NRVec<T>::operator*=(const T &a)
// scalar product x.y
template<>
inline const double NRVec<double>::operator*(const NRVec<double> &rhs) const
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("dot of incompatible vectors");
#endif
return cblas_ddot(nn, v, 1, rhs.v, 1);
}
template<>
inline const complex<double>
NRVec< complex<double> >::operator*(const NRVec< complex<double> > &rhs) const
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("dot of incompatible vectors");
#endif
complex<double> dot;
cblas_zdotc_sub(nn, v, 1, rhs.v, 1, &dot);
return dot;
}
template<typename T>
inline const T NRVec<T>::operator*(const NRVec<T> &rhs) const
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("dot of incompatible vectors");
NOT_GPU(*this);
NOT_GPU(rhs);
#endif
T dot = 0;
for(int i=0; i<nn; ++i) dot+= v[i]*rhs.v[i];
@@ -458,28 +455,6 @@ inline const T NRVec<T>::operator*(const NRVec<T> &rhs) const
// Sum of elements
template<>
inline const double NRVec<double>::asum() const
{
return cblas_dasum(nn, v, 1);
}
// Dot product: x * y
template<>
inline const double NRVec<double>::dot(const double *y, const int stride) const
{
return cblas_ddot(nn, y, stride, v, 1);
}
template<>
inline const complex<double>
NRVec< complex<double> >::dot(const complex<double> *y, const int stride) const
{
complex<double> dot;
cblas_zdotc_sub(nn, y, stride, v, 1, &dot);
return dot;
}
// x[i] returns i-th element
template <typename T>
@@ -489,6 +464,7 @@ inline T & NRVec<T>::operator[](const int i)
if(_LA_count_check && *count != 1) laerror("possible lval [] with count > 1");
if(i < 0 || i >= nn) laerror("NRVec out of range");
if(!v) laerror("[] on unallocated NRVec");
NOT_GPU(*this);
#endif
return v[i];
}
@@ -498,6 +474,7 @@ inline const T & NRVec<T>::operator[](const int i) const
#ifdef DEBUG
if(i < 0 || i >= nn) laerror("NRVec out of range");
if(!v) laerror("[] on unallocated NRVec");
NOT_GPU(*this);
#endif
return v[i];
}
@@ -527,29 +504,6 @@ inline NRVec<T>::operator const T*() const
return v;
}
// return norm of the Vec
template<>
inline const double NRVec<double>::norm() const
{
return cblas_dnrm2(nn, v, 1);
}
template<>
inline const double NRVec< complex<double> >::norm() const
{
return cblas_dznrm2(nn, v, 1);
}
// Max element of the array
template<>
inline const double NRVec<double>::amax() const
{
return v[cblas_idamax(nn, v, 1)];
}
template<>
inline const complex<double> NRVec< complex<double> >::amax() const
{
return v[cblas_izamax(nn, v, 1)];
}
// Make Vec unitvector
@@ -576,7 +530,16 @@ NRVec<T>::~NRVec()
{
if(!count) return;
if(--(*count) <= 0) {
if(v) delete[] (v);
if(v)
{
#ifdef CUDALA
if(location==cpu)
#endif
delete[] (v);
#ifdef CUDALA
else gpufree(v);
#endif
}
delete count;
}
}
@@ -591,12 +554,29 @@ void NRVec<T>::copyonwrite()
(*count)--;
count = new int;
*count = 1;
T *newv = new T[nn];
memcpy(newv, v, nn*sizeof(T));
T *newv;
#ifdef CUDALA
if(location==cpu)
{
#endif
newv = new T[nn];
memcpy(newv, v, nn*sizeof(T));
#ifdef CUDALA
}
else
{
newv = (T *) gpualloc(nn*sizeof(T));
if(sizeof(T)%sizeof(float)!=0) laerror("cpu memcpy alignment problem");
cublasScopy(nn*sizeof(T)/sizeof(float),(const float *) v,1,(float *)newv,1);
}
#endif
v = newv;
}
}
// Asignment
template <typename T>
NRVec<T> & NRVec<T>::operator=(const NRVec<T> &rhs)
@@ -606,17 +586,29 @@ NRVec<T> & NRVec<T>::operator=(const NRVec<T> &rhs)
if(count)
if(--(*count) == 0)
{
delete[] v;
#ifdef CUDALA
if(location==cpu)
#endif
delete[] v;
#ifdef CUDALA
else
gpufree(v);
#endif
delete count;
}
v = rhs.v;
nn = rhs.nn;
count = rhs.count;
#ifdef CUDALA
location=rhs.location;
#endif
if(count) (*count)++;
}
return *this;
}
// Resize
template <typename T>
void NRVec<T>::resize(const int n)
@@ -629,7 +621,17 @@ void NRVec<T>::resize(const int n)
if(n==0)
{
if(--(*count) <= 0) {
if(v) delete[] (v);
if(v)
{
#ifdef CUDALA
if(location==cpu)
#endif
delete[] (v);
#ifdef CUDALA
else
gpufree(v);
#endif
}
delete count;
}
count=0;
@@ -648,14 +650,33 @@ void NRVec<T>::resize(const int n)
count = new int;
*count = 1;
nn = n;
v = new T[nn];
#ifdef CUDALA
if(location==cpu)
#endif
v = new T[nn];
#ifdef CUDALA
else
v = (T*) gpualloc(nn*sizeof(T));
#endif
return;
}
// *count = 1 in this branch
if (n != nn) {
nn = n;
delete[] v;
v = new T[nn];
#ifdef CUDALA
if(location==cpu)
#endif
{
delete[] v;
v = new T[nn];
}
#ifdef CUDALA
else
{
gpufree(v);
v = (T*) gpualloc(nn*sizeof(T));
}
#endif
}
}
@@ -664,30 +685,18 @@ void NRVec<T>::resize(const int n)
template <typename T>
NRVec<T> & NRVec<T>::operator|=(const NRVec<T> &rhs)
{
if (this != &rhs) {
#ifdef DEBUG
if (!rhs.v) laerror("unallocated rhs in NRVec operator |=");
#endif
if (count)
if (*count > 1) {
--(*count);
nn = 0;
count = 0;
v = 0;
}
if (nn != rhs.nn) {
if (v) delete[] (v);
nn = rhs.nn;
}
if(!v) v = new T[nn];
if(!count) count = new int;
*count = 1;
memcpy(v, rhs.v, nn*sizeof(T));
}
return *this;
if (this == &rhs) return *this;
*this = rhs;
this->copyonwrite();
return *this;
}
template<typename T>
NRVec<complex<T> > complexify(const NRVec<T> &rhs)
{
@@ -696,6 +705,291 @@ for(int i=0; i<rhs.size(); ++i) r[i]=rhs[i];
return r;
}
#ifdef CUDALA
template<typename T>
void NRVec<T>::moveto(const GPUID dest)
{
if(location==dest) return;
location=dest;
if(v && !count) laerror("internal inconsistency of reference counting 1");
if (!count) return;
if(v && *count==0) laerror("internal inconsistency of reference counting 2");
if(!v) return;
T *vold = v;
if(dest == cpu) //moving from GPU to CPU
{
v = new T[nn];
gpuget(nn,sizeof(T),vold,v);
if(*count == 1) gpufree(vold);
else {--(*count); count = new int(1);}
}
else //moving from CPU to GPU
{
v=(T *) gpualloc(nn*sizeof(T));
gpuput(nn,sizeof(T),vold,v);
if(*count == 1) delete[] vold;
else {--(*count); count = new int(1);}
}
}
#endif
//some template specializations leading to BLAS/CUBLAS calls
template<>
inline
NRVec<double> & NRVec<double>::operator+=(const double &a)
{
copyonwrite();
#ifdef CUDALA
if(location==cpu)
#endif
cblas_daxpy(nn, 1.0, &a, 0, v, 1);
#ifdef CUDALA
else
{
double *d=gpuputdouble(a);
cublasDaxpy(nn, 1.0, d, 0, v, 1);
gpufree(d);
}
#endif
return *this;
}
template<>
inline
NRVec< complex<double> > &
NRVec< complex<double> >::operator+=(const complex<double> &a)
{
copyonwrite();
#ifdef CUDALA
if(location==cpu)
#endif
cblas_zaxpy(nn, &CONE, &a, 0, v, 1);
#ifdef CUDALA
else
{
complex<double> *d=gpuputcomplex(a);
cublasZaxpy(nn, CUONE, (cuDoubleComplex *)d, 0, (cuDoubleComplex *)v, 1);
gpufree(d);
}
#endif
return *this;
}
template<>
inline
NRVec<double> & NRVec<double>::operator-=(const double &a)
{
copyonwrite();
#ifdef CUDALA
if(location==cpu)
#endif
cblas_daxpy(nn, -1.0, &a, 0, v, 1);
#ifdef CUDALA
else
{
double *d=gpuputdouble(a);
cublasDaxpy(nn, -1.0, d, 0, v, 1);
gpufree(d);
}
#endif
return *this;
}
template<>
inline
NRVec< complex<double> > &
NRVec< complex<double> >::operator-=(const complex<double> &a)
{
copyonwrite();
#ifdef CUDALA
if(location==cpu)
#endif
cblas_zaxpy(nn, &CMONE, &a, 0, v, 1);
#ifdef CUDALA
else
{
complex<double> *d=gpuputcomplex(a);
cublasZaxpy(nn, CUMONE, (cuDoubleComplex *)d, 0, (cuDoubleComplex *)v, 1);
gpufree(d);
}
#endif
return *this;
}
template<>
inline
NRVec<double> & NRVec<double>::operator+=(const NRVec<double> &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
#endif
copyonwrite();
cblas_daxpy(nn, 1.0, rhs.v, 1, v, 1);
return *this;
}
template<>
inline
NRVec< complex<double> > &
NRVec< complex<double> >::operator+=(const NRVec< complex<double> > &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
#endif
copyonwrite();
cblas_zaxpy(nn, &CONE, rhs.v, 1, v, 1);
return *this;
}
template<>
inline
NRVec<double> & NRVec<double>::operator-=(const NRVec<double> &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
#endif
SAME_LOC(*this,rhs);
copyonwrite();
#ifdef CUDALA
if(location==cpu)
#endif
cblas_daxpy(nn, -1.0, rhs.v, 1, v, 1);
#ifdef CUDALA
else
cublasDaxpy(nn, -1.0, rhs.v, 1, v, 1);
#endif
return *this;
}
template<>
inline
NRVec< complex<double> > &
NRVec< complex<double> >::operator-=(const NRVec< complex<double> > &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
#endif
copyonwrite();
cblas_zaxpy(nn, &CMONE, rhs.v, 1, v, 1);
return *this;
}
template<>
inline
NRVec<double> & NRVec<double>::operator*=(const double &a)
{
copyonwrite();
cblas_dscal(nn, a, v, 1);
return *this;
}
template<>
inline
NRVec< complex<double> > &
NRVec< complex<double> >::operator*=(const complex<double> &a)
{
copyonwrite();
cblas_zscal(nn, &a, v, 1);
return *this;
}
template<>
inline
const double NRVec<double>::operator*(const NRVec<double> &rhs) const
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("dot of incompatible vectors");
#endif
return cblas_ddot(nn, v, 1, rhs.v, 1);
}
template<>
inline
const complex<double>
NRVec< complex<double> >::operator*(const NRVec< complex<double> > &rhs) const
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("dot of incompatible vectors");
#endif
complex<double> dot;
cblas_zdotc_sub(nn, v, 1, rhs.v, 1, &dot);
return dot;
}
// Sum of elements
template<>
inline
const double NRVec<double>::asum() const
{
return cblas_dasum(nn, v, 1);
}
// Dot product: x * y
template<>
inline
const double NRVec<double>::dot(const double *y, const int stride) const
{
return cblas_ddot(nn, y, stride, v, 1);
}
template<>
inline
const complex<double>
NRVec< complex<double> >::dot(const complex<double> *y, const int stride) const
{
complex<double> dot;
cblas_zdotc_sub(nn, y, stride, v, 1, &dot);
return dot;
}
// return norm of the Vec
template<>
inline
const double NRVec<double>::norm() const
{
#ifdef CUDALA
if(location!=cpu) return cublasDnrm2(nn, v, 1);
#endif
return cblas_dnrm2(nn, v, 1);
}
template<>
inline
const double NRVec< complex<double> >::norm() const
{
return cblas_dznrm2(nn, v, 1);
}
// Max element of the array
template<>
inline
const double NRVec<double>::amax() const
{
return v[cblas_idamax(nn, v, 1)];
}
/*
cblas_izamax seems to be missing at least in some cblas versions
template<>
inline
const complex<double> NRVec< complex<double> >::amax() const
{
return v[cblas_izamax(nn, v, 1)];
}
*/
}//namespace
#endif /* _LA_VEC_H_ */