2008-02-26 14:55:23 +01:00
|
|
|
/*
|
|
|
|
LA: linear algebra C++ interface library
|
|
|
|
Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
|
|
|
|
complex versions written by Roman Curik <roman.curik@jh-inst.cas.cz>
|
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
2004-03-17 04:07:21 +01:00
|
|
|
#ifndef _LA_VEC_H_
|
|
|
|
#define _LA_VEC_H_
|
|
|
|
|
2005-02-14 01:10:07 +01:00
|
|
|
#include "la_traits.h"
|
|
|
|
|
2009-11-12 22:01:19 +01:00
|
|
|
namespace LA {
|
|
|
|
|
2004-03-17 04:07:21 +01:00
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Forward declarations
|
|
|
|
template <typename T> void lawritemat(FILE *file,const T *a,int r,int c,
|
|
|
|
const char *form0,int nodim,int modulo, int issym);
|
|
|
|
|
|
|
|
// Memory allocated constants for cblas routines
|
|
|
|
const static complex<double> CONE = 1.0, CMONE = -1.0, CZERO = 0.0;
|
2010-06-25 17:28:19 +02:00
|
|
|
#ifdef CUDALA
|
|
|
|
const static cuDoubleComplex CUONE = {1.,0.}, CUMONE = {-1.,0.}, CUZERO = {0.,0.};
|
|
|
|
#endif
|
2004-03-17 04:07:21 +01:00
|
|
|
|
|
|
|
// Macros to construct binary operators +,-,*, from +=, -=, *=
|
|
|
|
// for 3 cases: X + a, a + X, X + Y
|
|
|
|
#define NRVECMAT_OPER(E,X) \
|
|
|
|
template<class T> \
|
|
|
|
inline const NR##E<T> NR##E<T>::operator X(const T &a) const \
|
|
|
|
{ return NR##E(*this) X##= a; } \
|
|
|
|
\
|
|
|
|
template<class T> \
|
|
|
|
inline const NR##E<T> operator X(const T &a, const NR##E<T> &rhs) \
|
|
|
|
{ return NR##E<T>(rhs) X##= a; }
|
|
|
|
|
|
|
|
#define NRVECMAT_OPER2(E,X) \
|
|
|
|
template<class T> \
|
2010-06-25 17:28:19 +02:00
|
|
|
inline const NR##E<T> NR##E<T>::operator X(const NR##E<T> &a) const \
|
2004-03-17 04:07:21 +01:00
|
|
|
{ return NR##E(*this) X##= a; }
|
|
|
|
|
2004-03-17 17:39:07 +01:00
|
|
|
|
2004-03-17 04:07:21 +01:00
|
|
|
// NRVec class
|
|
|
|
template <typename T>
|
|
|
|
class NRVec {
|
|
|
|
protected:
|
|
|
|
int nn;
|
|
|
|
T *v;
|
|
|
|
int *count;
|
2010-06-25 17:28:19 +02:00
|
|
|
#ifdef CUDALA
|
|
|
|
GPUID location;
|
|
|
|
#endif
|
2004-03-17 04:07:21 +01:00
|
|
|
public:
|
|
|
|
friend class NRSMat<T>;
|
|
|
|
friend class NRMat<T>;
|
|
|
|
|
2010-06-25 17:28:19 +02:00
|
|
|
inline NRVec(): nn(0),v(0),count(0)
|
|
|
|
{
|
|
|
|
#ifdef CUDALA
|
|
|
|
location = DEFAULT_LOC;
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
explicit inline NRVec(const int n, const GPUID loc= undefined) : nn(n), count(new int(1))
|
|
|
|
{
|
|
|
|
#ifdef CUDALA
|
|
|
|
if(loc==undefined) location = DEFAULT_LOC; else location = loc;
|
|
|
|
if(location==cpu)
|
|
|
|
#endif
|
|
|
|
v= new T[n];
|
|
|
|
#ifdef CUDALA
|
|
|
|
else
|
|
|
|
v= (T*) gpualloc(n*sizeof(T));
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
|
2004-03-17 04:07:21 +01:00
|
|
|
inline NRVec(const T &a, const int n);
|
2006-09-12 01:07:22 +02:00
|
|
|
inline NRVec(const T *a, const int n);
|
|
|
|
inline NRVec(T *a, const int n, bool skeleton);
|
2004-03-17 04:07:21 +01:00
|
|
|
inline NRVec(const NRVec &rhs);
|
2009-09-04 10:09:32 +02:00
|
|
|
NRVec(const typename LA_traits_complex<T>::NRVec_Noncomplex_type &rhs, bool imagpart=false); //construct complex from real
|
2004-03-17 04:07:21 +01:00
|
|
|
inline explicit NRVec(const NRSMat<T> & S);
|
2005-11-20 14:46:00 +01:00
|
|
|
#ifdef MATPTR
|
|
|
|
explicit NRVec(const NRMat<T> &rhs) : NRVec(&rhs[0][0],rhs.nrows()*rhs.ncols()) {};
|
|
|
|
#else
|
2004-03-17 04:07:21 +01:00
|
|
|
explicit NRVec(const NRMat<T> &rhs);
|
2010-06-25 17:28:19 +02:00
|
|
|
#endif
|
|
|
|
#ifdef CUDALA
|
|
|
|
inline GPUID getlocation() const {return location;}
|
|
|
|
void moveto(const GPUID dest);
|
|
|
|
#else
|
|
|
|
inline GPUID getlocation() const {return cpu;}
|
|
|
|
void moveto(const GPUID dest) {};
|
2004-03-17 04:07:21 +01:00
|
|
|
#endif
|
|
|
|
NRVec & operator=(const NRVec &rhs);
|
|
|
|
NRVec & operator=(const T &a); //assign a to every element
|
2009-09-04 10:09:32 +02:00
|
|
|
void randomize(const typename LA_traits<T>::normtype &x);
|
2004-03-17 04:07:21 +01:00
|
|
|
NRVec & operator|=(const NRVec &rhs);
|
2005-09-06 17:55:07 +02:00
|
|
|
const bool operator!=(const NRVec &rhs) const {if(nn!=rhs.nn) return 1; return LA_traits<T>::gencmp(v,rhs.v,nn);} //memcmp for scalars else elementwise
|
2005-02-14 01:10:07 +01:00
|
|
|
const bool operator==(const NRVec &rhs) const {return !(*this != rhs);};
|
2005-09-06 17:55:07 +02:00
|
|
|
const bool operator>(const NRVec &rhs) const;
|
|
|
|
const bool operator<(const NRVec &rhs) const;
|
|
|
|
const bool operator>=(const NRVec &rhs) const {return !(*this < rhs);};
|
|
|
|
const bool operator<=(const NRVec &rhs) const {return !(*this > rhs);};
|
2004-03-17 04:07:21 +01:00
|
|
|
const NRVec operator-() const;
|
|
|
|
inline NRVec & operator+=(const NRVec &rhs);
|
|
|
|
inline NRVec & operator-=(const NRVec &rhs);
|
2006-04-06 23:45:51 +02:00
|
|
|
inline NRVec & operator*=(const NRVec &rhs); //elementwise
|
|
|
|
inline NRVec & operator/=(const NRVec &rhs); //elementwise
|
2004-03-17 04:07:21 +01:00
|
|
|
inline NRVec & operator+=(const T &a);
|
|
|
|
inline NRVec & operator-=(const T &a);
|
|
|
|
inline NRVec & operator*=(const T &a);
|
|
|
|
inline int getcount() const {return count?*count:0;}
|
|
|
|
inline const NRVec operator+(const NRVec &rhs) const;
|
|
|
|
inline const NRVec operator-(const NRVec &rhs) const;
|
|
|
|
inline const NRVec operator+(const T &a) const;
|
|
|
|
inline const NRVec operator-(const T &a) const;
|
|
|
|
inline const NRVec operator*(const T &a) const;
|
2005-02-18 23:08:15 +01:00
|
|
|
inline const T operator*(const NRVec &rhs) const; //scalar product -> dot
|
|
|
|
inline const T dot(const NRVec &rhs) const {return *this * rhs;}; //@@@for complex do conjugate
|
|
|
|
void gemv(const T beta, const NRMat<T> &a, const char trans, const T alpha, const NRVec &x);
|
|
|
|
void gemv(const T beta, const NRSMat<T> &a, const char trans /*just for compatibility*/, const T alpha, const NRVec &x);
|
2006-04-06 23:45:51 +02:00
|
|
|
void gemv(const T beta, const SparseMat<T> &a, const char trans, const T alpha, const NRVec &x,const bool treat_as_symmetric=false);
|
2009-09-04 10:09:32 +02:00
|
|
|
void gemv(const typename LA_traits_complex<T>::Component_type beta, const typename LA_traits_complex<T>::NRMat_Noncomplex_type &a, const char trans, const typename LA_traits_complex<T>::Component_type alpha, const NRVec &x);
|
|
|
|
void gemv(const typename LA_traits_complex<T>::Component_type beta, const typename LA_traits_complex<T>::NRSMat_Noncomplex_type &a, const char trans, const typename LA_traits_complex<T>::Component_type alpha, const NRVec &x);
|
2010-06-25 17:28:19 +02:00
|
|
|
const NRVec operator*(const NRMat<T> &mat) const {NRVec<T> result(mat.ncols(),mat.getlocation()); result.gemv((T)0,mat,'t',(T)1,*this); return result;};
|
|
|
|
const NRVec operator*(const NRSMat<T> &mat) const {NRVec<T> result(mat.ncols(),mat.getlocation()); result.gemv((T)0,mat,'t',(T)1,*this); return result;};
|
2005-02-18 23:08:15 +01:00
|
|
|
const NRVec operator*(const SparseMat<T> &mat) const {NRVec<T> result(mat.ncols()); result.gemv((T)0,mat,'t',(T)1,*this); return result;};
|
2009-11-12 22:01:19 +01:00
|
|
|
const NRMat<T> otimes(const NRVec<T> &rhs, const bool conjugate=false, const T &scale=1) const; //outer product
|
|
|
|
inline const NRMat<T> operator|(const NRVec<T> &rhs) const {return otimes(rhs,true);};
|
2008-04-16 14:56:02 +02:00
|
|
|
inline const T sum() const {T sum=0; for(int i=0; i<nn; i++) sum += v[i]; return sum;}; //sum of its elements
|
|
|
|
inline const T asum() const; //sum of its elements absolute values
|
2004-03-17 04:07:21 +01:00
|
|
|
inline const T dot(const T *a, const int stride=1) const; // ddot with a stride-vector
|
|
|
|
inline T & operator[](const int i);
|
|
|
|
inline const T & operator[](const int i) const;
|
2010-01-11 11:12:28 +01:00
|
|
|
typedef T ROWTYPE;
|
|
|
|
inline void setcoldim(int i) {}; //dummy
|
2004-03-17 04:07:21 +01:00
|
|
|
inline int size() const;
|
|
|
|
inline operator T*(); //get a pointer to the data
|
|
|
|
inline operator const T*() const; //get a pointer to the data
|
|
|
|
~NRVec();
|
|
|
|
void axpy(const T alpha, const NRVec &x); // this+= a*x
|
|
|
|
void axpy(const T alpha, const T *x, const int stride=1); // this+= a*x
|
|
|
|
void copyonwrite();
|
2009-09-04 10:09:32 +02:00
|
|
|
void clear() {copyonwrite(); LA_traits<T>::clear(v,nn);}; //zero out
|
2004-03-17 04:07:21 +01:00
|
|
|
void resize(const int n);
|
2005-09-11 22:04:24 +02:00
|
|
|
void get(int fd, bool dimensions=1, bool transp=0);
|
|
|
|
void put(int fd, bool dimensions=1, bool transp=0) const;
|
2004-03-17 04:07:21 +01:00
|
|
|
NRVec & normalize();
|
2009-09-04 10:09:32 +02:00
|
|
|
inline const typename LA_traits<T>::normtype norm() const;
|
2004-03-17 04:07:21 +01:00
|
|
|
inline const T amax() const;
|
|
|
|
inline const NRVec unitvector() const;
|
|
|
|
void fprintf(FILE *f, const char *format, const int modulo) const;
|
|
|
|
void fscanf(FILE *f, const char *format);
|
|
|
|
//sparse matrix concerning members
|
|
|
|
explicit NRVec(const SparseMat<T> &rhs); // dense from sparse matrix with one of dimensions =1
|
|
|
|
inline void simplify() {}; //just for compatibility with sparse ones
|
2006-04-01 14:58:57 +02:00
|
|
|
bool bigger(int i, int j) const {return LA_traits<T>::bigger(v[i],v[j]);};
|
|
|
|
bool smaller(int i, int j) const {return LA_traits<T>::smaller(v[i],v[j]);};
|
|
|
|
void swap(int i, int j) {T tmp; tmp=v[i]; v[i]=v[j]; v[j]=tmp;};
|
2006-04-01 16:56:35 +02:00
|
|
|
int sort(int direction=0, int from=0, int to= -1, int *perm=NULL); //sort, ascending by default, returns parity of permutation
|
2010-02-25 21:47:01 +01:00
|
|
|
NRVec & CallOnMe(T (*_F)(const T &) ) {copyonwrite(); for(int i=0; i<nn; ++i) v[i] = _F(v[i]); return *this;};
|
2004-03-17 04:07:21 +01:00
|
|
|
};
|
|
|
|
|
2009-11-12 22:01:19 +01:00
|
|
|
}//namespace
|
2006-04-01 14:58:57 +02:00
|
|
|
|
2005-02-18 23:08:15 +01:00
|
|
|
//due to mutual includes this has to be after full class declaration
|
|
|
|
#include "mat.h"
|
|
|
|
#include "smat.h"
|
|
|
|
#include "sparsemat.h"
|
2010-01-11 11:12:28 +01:00
|
|
|
#include "sparsesmat.h"
|
2005-02-18 23:08:15 +01:00
|
|
|
|
2010-06-25 17:28:19 +02:00
|
|
|
|
|
|
|
|
2009-11-12 22:01:19 +01:00
|
|
|
namespace LA {
|
2006-10-21 17:32:53 +02:00
|
|
|
// formatted I/O
|
|
|
|
template <typename T>
|
2009-11-12 22:01:19 +01:00
|
|
|
std::ostream & operator<<(std::ostream &s, const NRVec<T> &x)
|
2006-10-21 17:32:53 +02:00
|
|
|
{
|
2010-06-25 17:28:19 +02:00
|
|
|
#ifdef CUDALA
|
|
|
|
if(x.getlocation()==cpu)
|
|
|
|
{
|
|
|
|
#endif
|
2006-10-21 17:32:53 +02:00
|
|
|
int i, n;
|
|
|
|
n = x.size();
|
2009-11-12 22:01:19 +01:00
|
|
|
s << n << std::endl;
|
2006-10-21 17:32:53 +02:00
|
|
|
for(i=0; i<n; i++) s << (typename LA_traits_io<T>::IOtype)x[i] << (i == n-1 ? '\n' : ' ');
|
|
|
|
return s;
|
2010-06-25 17:28:19 +02:00
|
|
|
#ifdef CUDALA
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
NRVec<T> tmp=x;
|
|
|
|
tmp.moveto(cpu);
|
|
|
|
return s<<tmp;
|
|
|
|
}
|
|
|
|
#endif
|
2006-10-21 17:32:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
2009-11-12 22:01:19 +01:00
|
|
|
std::istream & operator>>(std::istream &s, NRVec<T> &x)
|
2006-10-21 17:32:53 +02:00
|
|
|
{
|
2010-06-25 17:28:19 +02:00
|
|
|
#ifdef CUDALA
|
|
|
|
if(x.getlocation()==cpu)
|
|
|
|
{
|
|
|
|
#endif
|
2006-10-21 17:32:53 +02:00
|
|
|
int i,n;
|
|
|
|
s >> n;
|
|
|
|
x.resize(n);
|
|
|
|
typename LA_traits_io<T>::IOtype tmp;
|
|
|
|
for(i=0; i<n; i++) {s >> tmp; x[i]=tmp;}
|
|
|
|
return s;
|
2010-06-25 17:28:19 +02:00
|
|
|
#ifdef CUDALA
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
NRVec<T> tmp;
|
|
|
|
tmp.moveto(cpu);
|
|
|
|
s >> tmp;
|
|
|
|
tmp.moveto(x.getlocation());
|
|
|
|
x=tmp;
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
#endif
|
2006-10-21 17:32:53 +02:00
|
|
|
}
|
|
|
|
|
2004-03-17 04:07:21 +01:00
|
|
|
|
|
|
|
// INLINES
|
|
|
|
|
|
|
|
// ctors
|
|
|
|
template <typename T>
|
2010-06-25 17:28:19 +02:00
|
|
|
inline NRVec<T>::NRVec(const T& a, const int n) : nn(n), count(new int)
|
2004-03-17 04:07:21 +01:00
|
|
|
{
|
|
|
|
*count = 1;
|
2010-06-25 17:28:19 +02:00
|
|
|
#ifdef CUDALA
|
|
|
|
location=DEFAULT_LOC;
|
|
|
|
if(location==cpu)
|
|
|
|
{
|
|
|
|
#endif
|
|
|
|
v = new T[n];
|
2004-03-17 04:07:21 +01:00
|
|
|
if(a != (T)0)
|
|
|
|
for(int i=0; i<n; i++)
|
|
|
|
v[i] = a;
|
|
|
|
else
|
|
|
|
memset(v, 0, nn*sizeof(T));
|
2010-06-25 17:28:19 +02:00
|
|
|
#ifdef CUDALA
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
v= (T*) gpualloc(n*sizeof(T));
|
|
|
|
cublasSetVector(n,sizeof(T),&a,0,v,1);
|
|
|
|
}
|
|
|
|
#endif
|
2004-03-17 04:07:21 +01:00
|
|
|
}
|
|
|
|
|
2010-06-25 17:28:19 +02:00
|
|
|
|
2004-03-17 04:07:21 +01:00
|
|
|
template <typename T>
|
2006-09-12 01:07:22 +02:00
|
|
|
inline NRVec<T>::NRVec(const T *a, const int n) : nn(n), count(new int)
|
2004-03-17 04:07:21 +01:00
|
|
|
{
|
2010-06-25 17:28:19 +02:00
|
|
|
#ifdef CUDALA
|
|
|
|
location=DEFAULT_LOC;
|
|
|
|
if(location==cpu)
|
|
|
|
{
|
|
|
|
#endif
|
|
|
|
v=new T[n];
|
|
|
|
*count = 1;
|
|
|
|
memcpy(v, a, n*sizeof(T));
|
|
|
|
#ifdef CUDALA
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
v= (T*) gpualloc(n*sizeof(T));
|
|
|
|
cublasSetVector(n,sizeof(T),a,1,v,1);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2006-09-12 01:07:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
inline NRVec<T>::NRVec(T *a, const int n, bool skeleton) : nn(n), count(new int)
|
|
|
|
{
|
|
|
|
if(!skeleton)
|
|
|
|
{
|
2010-06-25 17:28:19 +02:00
|
|
|
#ifdef CUDALA
|
|
|
|
location=DEFAULT_LOC;
|
|
|
|
if(location==cpu)
|
|
|
|
{
|
|
|
|
#endif
|
2006-09-12 01:07:22 +02:00
|
|
|
v=new T[n];
|
|
|
|
*count = 1;
|
|
|
|
memcpy(v, a, n*sizeof(T));
|
2010-06-25 17:28:19 +02:00
|
|
|
#ifdef CUDALA
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
v= (T*) gpualloc(n*sizeof(T));
|
|
|
|
cublasSetVector(n,sizeof(T),a,1,v,1);
|
|
|
|
}
|
|
|
|
#endif
|
2006-09-12 01:07:22 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2010-06-25 17:28:19 +02:00
|
|
|
#ifdef CUDALA
|
|
|
|
if(location!=cpu) laerror("NRVec() with skeleton option cannot be on GPU");
|
|
|
|
#endif
|
2006-09-12 01:07:22 +02:00
|
|
|
*count = 2;
|
|
|
|
v=a;
|
|
|
|
}
|
2004-03-17 04:07:21 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
inline NRVec<T>::NRVec(const NRVec<T> &rhs)
|
|
|
|
{
|
2010-06-25 17:28:19 +02:00
|
|
|
#ifdef CUDALA
|
|
|
|
location=rhs.location;
|
|
|
|
#endif
|
2004-03-17 04:07:21 +01:00
|
|
|
v = rhs.v;
|
|
|
|
nn = rhs.nn;
|
|
|
|
count = rhs.count;
|
|
|
|
if(count) (*count)++;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
inline NRVec<T>::NRVec(const NRSMat<T> &rhs)
|
|
|
|
{
|
2010-06-25 17:28:19 +02:00
|
|
|
#ifdef CUDALA
|
|
|
|
location=rhs.location;
|
|
|
|
#endif
|
2004-03-17 04:07:21 +01:00
|
|
|
nn = rhs.nn;
|
|
|
|
nn = NN2;
|
|
|
|
v = rhs.v;
|
|
|
|
count = rhs.count;
|
|
|
|
(*count)++;
|
|
|
|
}
|
|
|
|
|
2010-06-25 17:28:19 +02:00
|
|
|
// x +/-= a
|
2004-03-17 06:34:59 +01:00
|
|
|
template <typename T>
|
|
|
|
inline NRVec<T> & NRVec<T>::operator+=(const T &a)
|
|
|
|
{
|
2010-06-25 17:28:19 +02:00
|
|
|
NOT_GPU(*this);
|
2004-03-17 06:34:59 +01:00
|
|
|
copyonwrite();
|
|
|
|
int i;
|
|
|
|
for(i=0; i<nn; ++i) v[i]+=a;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
inline NRVec<T> & NRVec<T>::operator-=(const T &a)
|
|
|
|
{
|
2010-06-25 17:28:19 +02:00
|
|
|
NOT_GPU(*this);
|
2004-03-17 06:34:59 +01:00
|
|
|
copyonwrite();
|
2010-06-25 17:28:19 +02:00
|
|
|
int i;
|
|
|
|
for(i=0; i<nn; ++i) v[i]-=a;
|
2004-03-17 06:34:59 +01:00
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2004-03-17 04:07:21 +01:00
|
|
|
|
2010-06-25 17:28:19 +02:00
|
|
|
// x += x
|
2004-03-17 06:34:59 +01:00
|
|
|
template <typename T>
|
|
|
|
inline NRVec<T> & NRVec<T>::operator+=(const NRVec<T> &rhs)
|
|
|
|
{
|
|
|
|
#ifdef DEBUG
|
|
|
|
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
|
2010-06-25 17:28:19 +02:00
|
|
|
NOT_GPU(*this);
|
|
|
|
NOT_GPU(rhs);
|
2004-03-17 06:34:59 +01:00
|
|
|
#endif
|
|
|
|
copyonwrite();
|
|
|
|
int i;
|
|
|
|
for(i=0; i<nn; ++i) v[i]+=rhs.v[i];
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2006-04-06 23:45:51 +02:00
|
|
|
//for general type only
|
|
|
|
template <typename T>
|
|
|
|
inline NRVec<T> & NRVec<T>::operator*=(const NRVec<T> &rhs)
|
|
|
|
{
|
|
|
|
#ifdef DEBUG
|
|
|
|
if (nn != rhs.nn) laerror("*= of incompatible vectors");
|
|
|
|
#endif
|
|
|
|
copyonwrite();
|
|
|
|
int i;
|
|
|
|
for(i=0; i<nn; ++i) v[i]*=rhs.v[i];
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
inline NRVec<T> & NRVec<T>::operator/=(const NRVec<T> &rhs)
|
|
|
|
{
|
|
|
|
#ifdef DEBUG
|
|
|
|
if (nn != rhs.nn) laerror("/= of incompatible vectors");
|
2010-06-25 17:28:19 +02:00
|
|
|
NOT_GPU(*this);
|
|
|
|
NOT_GPU(rhs);
|
2006-04-06 23:45:51 +02:00
|
|
|
#endif
|
|
|
|
copyonwrite();
|
|
|
|
int i;
|
|
|
|
for(i=0; i<nn; ++i) v[i]/=rhs.v[i];
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2004-03-17 06:34:59 +01:00
|
|
|
|
2004-03-17 04:07:21 +01:00
|
|
|
// x -= x
|
2004-03-17 06:34:59 +01:00
|
|
|
template <typename T>
|
|
|
|
inline NRVec<T> & NRVec<T>::operator-=(const NRVec<T> &rhs)
|
|
|
|
{
|
|
|
|
#ifdef DEBUG
|
|
|
|
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
|
2010-06-25 17:28:19 +02:00
|
|
|
NOT_GPU(*this);
|
|
|
|
NOT_GPU(rhs);
|
2004-03-17 06:34:59 +01:00
|
|
|
#endif
|
|
|
|
copyonwrite();
|
|
|
|
int i;
|
|
|
|
for(i=0; i<nn; ++i) v[i]-=rhs.v[i];
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2004-03-17 04:07:21 +01:00
|
|
|
// x *= a
|
2004-03-17 06:34:59 +01:00
|
|
|
template <typename T>
|
|
|
|
inline NRVec<T> & NRVec<T>::operator*=(const T &a)
|
|
|
|
{
|
2010-06-25 17:28:19 +02:00
|
|
|
NOT_GPU(*this);
|
2004-03-17 06:34:59 +01:00
|
|
|
copyonwrite();
|
|
|
|
int i;
|
|
|
|
for(i=0; i<nn; ++i) v[i]*=a;
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2004-03-17 04:07:21 +01:00
|
|
|
// scalar product x.y
|
2005-02-18 23:08:15 +01:00
|
|
|
template<typename T>
|
|
|
|
inline const T NRVec<T>::operator*(const NRVec<T> &rhs) const
|
2004-03-17 04:07:21 +01:00
|
|
|
{
|
2005-02-18 23:08:15 +01:00
|
|
|
#ifdef DEBUG
|
|
|
|
if (nn != rhs.nn) laerror("dot of incompatible vectors");
|
2010-06-25 17:28:19 +02:00
|
|
|
NOT_GPU(*this);
|
|
|
|
NOT_GPU(rhs);
|
2005-02-18 23:08:15 +01:00
|
|
|
#endif
|
|
|
|
T dot = 0;
|
|
|
|
for(int i=0; i<nn; ++i) dot+= v[i]*rhs.v[i];
|
|
|
|
return dot;
|
2004-03-17 04:07:21 +01:00
|
|
|
}
|
|
|
|
|
2005-02-18 23:08:15 +01:00
|
|
|
|
|
|
|
|
2004-03-17 04:07:21 +01:00
|
|
|
|
|
|
|
// x[i] returns i-th element
|
|
|
|
template <typename T>
|
|
|
|
inline T & NRVec<T>::operator[](const int i)
|
|
|
|
{
|
|
|
|
#ifdef DEBUG
|
2009-09-04 10:09:32 +02:00
|
|
|
if(_LA_count_check && *count != 1) laerror("possible lval [] with count > 1");
|
2004-03-17 04:07:21 +01:00
|
|
|
if(i < 0 || i >= nn) laerror("NRVec out of range");
|
|
|
|
if(!v) laerror("[] on unallocated NRVec");
|
2010-06-25 17:28:19 +02:00
|
|
|
NOT_GPU(*this);
|
2004-03-17 04:07:21 +01:00
|
|
|
#endif
|
|
|
|
return v[i];
|
|
|
|
}
|
|
|
|
template <typename T>
|
|
|
|
inline const T & NRVec<T>::operator[](const int i) const
|
|
|
|
{
|
|
|
|
#ifdef DEBUG
|
|
|
|
if(i < 0 || i >= nn) laerror("NRVec out of range");
|
|
|
|
if(!v) laerror("[] on unallocated NRVec");
|
2010-06-25 17:28:19 +02:00
|
|
|
NOT_GPU(*this);
|
2004-03-17 04:07:21 +01:00
|
|
|
#endif
|
|
|
|
return v[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
// length of the vector
|
|
|
|
template <typename T>
|
|
|
|
inline int NRVec<T>::size() const
|
|
|
|
{
|
|
|
|
return nn;
|
|
|
|
}
|
|
|
|
|
|
|
|
// reference Vec to the first element
|
|
|
|
template <typename T>
|
|
|
|
inline NRVec<T>::operator T*()
|
|
|
|
{
|
|
|
|
#ifdef DEBUG
|
|
|
|
if(!v) laerror("unallocated NRVec in operator T*");
|
|
|
|
#endif
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
template <typename T>
|
|
|
|
inline NRVec<T>::operator const T*() const
|
|
|
|
{
|
|
|
|
#ifdef DEBUG
|
|
|
|
if(!v) laerror("unallocated NRVec in operator T*");
|
|
|
|
#endif
|
|
|
|
return v;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Make Vec unitvector
|
|
|
|
template <typename T>
|
|
|
|
inline const NRVec<T> NRVec<T>::unitvector() const
|
|
|
|
{
|
|
|
|
return NRVec<T>(*this).normalize();
|
|
|
|
}
|
|
|
|
|
|
|
|
// generate operators: Vec + a, a + Vec, Vec * a
|
|
|
|
NRVECMAT_OPER(Vec,+)
|
|
|
|
NRVECMAT_OPER(Vec,-)
|
|
|
|
NRVECMAT_OPER(Vec,*)
|
|
|
|
// generate operators: Vec + Vec, Vec - Vec
|
|
|
|
NRVECMAT_OPER2(Vec,+)
|
|
|
|
NRVECMAT_OPER2(Vec,-)
|
|
|
|
|
|
|
|
// Few forward declarations
|
|
|
|
|
2004-03-17 17:39:07 +01:00
|
|
|
//basic stuff which has to be in .h
|
|
|
|
// dtor
|
|
|
|
template <typename T>
|
|
|
|
NRVec<T>::~NRVec()
|
|
|
|
{
|
|
|
|
if(!count) return;
|
|
|
|
if(--(*count) <= 0) {
|
2010-06-25 17:28:19 +02:00
|
|
|
if(v)
|
|
|
|
{
|
|
|
|
#ifdef CUDALA
|
|
|
|
if(location==cpu)
|
|
|
|
#endif
|
|
|
|
delete[] (v);
|
|
|
|
#ifdef CUDALA
|
|
|
|
else gpufree(v);
|
|
|
|
#endif
|
|
|
|
}
|
2004-03-17 17:39:07 +01:00
|
|
|
delete count;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// detach from a physical vector and make own copy
|
|
|
|
template <typename T>
|
|
|
|
void NRVec<T>::copyonwrite()
|
|
|
|
{
|
2008-03-05 14:49:51 +01:00
|
|
|
if(!count) laerror("Vec::copyonwrite() of an undefined vector");
|
2004-03-17 17:39:07 +01:00
|
|
|
if(*count > 1)
|
|
|
|
{
|
|
|
|
(*count)--;
|
|
|
|
count = new int;
|
|
|
|
*count = 1;
|
2010-06-25 17:28:19 +02:00
|
|
|
T *newv;
|
|
|
|
#ifdef CUDALA
|
|
|
|
if(location==cpu)
|
|
|
|
{
|
|
|
|
#endif
|
|
|
|
newv = new T[nn];
|
|
|
|
memcpy(newv, v, nn*sizeof(T));
|
|
|
|
#ifdef CUDALA
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
newv = (T *) gpualloc(nn*sizeof(T));
|
|
|
|
if(sizeof(T)%sizeof(float)!=0) laerror("cpu memcpy alignment problem");
|
|
|
|
cublasScopy(nn*sizeof(T)/sizeof(float),(const float *) v,1,(float *)newv,1);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
2004-03-17 17:39:07 +01:00
|
|
|
v = newv;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-06-25 17:28:19 +02:00
|
|
|
|
2004-03-17 17:39:07 +01:00
|
|
|
// Asignment
|
|
|
|
template <typename T>
|
|
|
|
NRVec<T> & NRVec<T>::operator=(const NRVec<T> &rhs)
|
|
|
|
{
|
|
|
|
if (this != &rhs)
|
|
|
|
{
|
|
|
|
if(count)
|
|
|
|
if(--(*count) == 0)
|
|
|
|
{
|
2010-06-25 17:28:19 +02:00
|
|
|
#ifdef CUDALA
|
|
|
|
if(location==cpu)
|
|
|
|
#endif
|
|
|
|
delete[] v;
|
|
|
|
#ifdef CUDALA
|
|
|
|
else
|
|
|
|
gpufree(v);
|
|
|
|
#endif
|
2004-03-17 17:39:07 +01:00
|
|
|
delete count;
|
|
|
|
}
|
|
|
|
v = rhs.v;
|
|
|
|
nn = rhs.nn;
|
|
|
|
count = rhs.count;
|
2010-06-25 17:28:19 +02:00
|
|
|
#ifdef CUDALA
|
|
|
|
location=rhs.location;
|
|
|
|
#endif
|
2004-03-17 17:39:07 +01:00
|
|
|
if(count) (*count)++;
|
|
|
|
}
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2010-06-25 17:28:19 +02:00
|
|
|
|
|
|
|
|
2004-03-17 17:39:07 +01:00
|
|
|
// Resize
|
|
|
|
template <typename T>
|
|
|
|
void NRVec<T>::resize(const int n)
|
|
|
|
{
|
|
|
|
#ifdef DEBUG
|
2005-02-14 01:10:07 +01:00
|
|
|
if(n<0) laerror("illegal vector dimension");
|
2004-03-17 17:39:07 +01:00
|
|
|
#endif
|
|
|
|
if(count)
|
2005-02-14 01:10:07 +01:00
|
|
|
{
|
|
|
|
if(n==0)
|
|
|
|
{
|
|
|
|
if(--(*count) <= 0) {
|
2010-06-25 17:28:19 +02:00
|
|
|
if(v)
|
|
|
|
{
|
|
|
|
#ifdef CUDALA
|
|
|
|
if(location==cpu)
|
|
|
|
#endif
|
|
|
|
delete[] (v);
|
|
|
|
#ifdef CUDALA
|
|
|
|
else
|
|
|
|
gpufree(v);
|
|
|
|
#endif
|
|
|
|
}
|
2005-02-14 01:10:07 +01:00
|
|
|
delete count;
|
|
|
|
}
|
|
|
|
count=0;
|
|
|
|
nn=0;
|
|
|
|
v=0;
|
|
|
|
return;
|
|
|
|
}
|
2004-03-17 17:39:07 +01:00
|
|
|
if(*count > 1) {
|
|
|
|
(*count)--;
|
|
|
|
count = 0;
|
|
|
|
v = 0;
|
|
|
|
nn = 0;
|
|
|
|
}
|
2005-02-14 01:10:07 +01:00
|
|
|
}
|
2004-03-17 17:39:07 +01:00
|
|
|
if(!count) {
|
|
|
|
count = new int;
|
|
|
|
*count = 1;
|
|
|
|
nn = n;
|
2010-06-25 17:28:19 +02:00
|
|
|
#ifdef CUDALA
|
|
|
|
if(location==cpu)
|
|
|
|
#endif
|
|
|
|
v = new T[nn];
|
|
|
|
#ifdef CUDALA
|
|
|
|
else
|
|
|
|
v = (T*) gpualloc(nn*sizeof(T));
|
|
|
|
#endif
|
2004-03-17 17:39:07 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
// *count = 1 in this branch
|
|
|
|
if (n != nn) {
|
|
|
|
nn = n;
|
2010-06-25 17:28:19 +02:00
|
|
|
#ifdef CUDALA
|
|
|
|
if(location==cpu)
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
delete[] v;
|
|
|
|
v = new T[nn];
|
|
|
|
}
|
|
|
|
#ifdef CUDALA
|
|
|
|
else
|
|
|
|
{
|
|
|
|
gpufree(v);
|
|
|
|
v = (T*) gpualloc(nn*sizeof(T));
|
|
|
|
}
|
|
|
|
#endif
|
2004-03-17 17:39:07 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-04-06 23:45:51 +02:00
|
|
|
// assignment with a physical (deep) copy
|
2004-03-17 17:39:07 +01:00
|
|
|
template <typename T>
|
|
|
|
NRVec<T> & NRVec<T>::operator|=(const NRVec<T> &rhs)
|
|
|
|
{
|
|
|
|
#ifdef DEBUG
|
|
|
|
if (!rhs.v) laerror("unallocated rhs in NRVec operator |=");
|
|
|
|
#endif
|
2010-06-25 17:28:19 +02:00
|
|
|
if (this == &rhs) return *this;
|
|
|
|
*this = rhs;
|
|
|
|
this->copyonwrite();
|
|
|
|
return *this;
|
2004-03-17 17:39:07 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2010-06-25 17:28:19 +02:00
|
|
|
|
|
|
|
|
2006-04-01 06:48:01 +02:00
|
|
|
template<typename T>
|
|
|
|
NRVec<complex<T> > complexify(const NRVec<T> &rhs)
|
|
|
|
{
|
|
|
|
NRVec<complex<T> > r(rhs.size());
|
|
|
|
for(int i=0; i<rhs.size(); ++i) r[i]=rhs[i];
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2010-06-25 17:28:19 +02:00
|
|
|
|
|
|
|
#ifdef CUDALA
|
|
|
|
template<typename T>
|
|
|
|
void NRVec<T>::moveto(const GPUID dest)
|
|
|
|
{
|
|
|
|
if(location==dest) return;
|
|
|
|
location=dest;
|
|
|
|
|
|
|
|
if(v && !count) laerror("internal inconsistency of reference counting 1");
|
|
|
|
if (!count) return;
|
|
|
|
|
|
|
|
if(v && *count==0) laerror("internal inconsistency of reference counting 2");
|
|
|
|
if(!v) return;
|
|
|
|
|
|
|
|
T *vold = v;
|
|
|
|
|
|
|
|
if(dest == cpu) //moving from GPU to CPU
|
|
|
|
{
|
|
|
|
v = new T[nn];
|
|
|
|
gpuget(nn,sizeof(T),vold,v);
|
|
|
|
if(*count == 1) gpufree(vold);
|
|
|
|
else {--(*count); count = new int(1);}
|
|
|
|
}
|
|
|
|
else //moving from CPU to GPU
|
|
|
|
{
|
|
|
|
v=(T *) gpualloc(nn*sizeof(T));
|
|
|
|
gpuput(nn,sizeof(T),vold,v);
|
|
|
|
if(*count == 1) delete[] vold;
|
|
|
|
else {--(*count); count = new int(1);}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
//some template specializations leading to BLAS/CUBLAS calls
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
NRVec<double> & NRVec<double>::operator+=(const double &a)
|
|
|
|
{
|
|
|
|
copyonwrite();
|
|
|
|
#ifdef CUDALA
|
|
|
|
if(location==cpu)
|
|
|
|
#endif
|
|
|
|
cblas_daxpy(nn, 1.0, &a, 0, v, 1);
|
|
|
|
#ifdef CUDALA
|
|
|
|
else
|
|
|
|
{
|
|
|
|
double *d=gpuputdouble(a);
|
|
|
|
cublasDaxpy(nn, 1.0, d, 0, v, 1);
|
|
|
|
gpufree(d);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
NRVec< complex<double> > &
|
|
|
|
NRVec< complex<double> >::operator+=(const complex<double> &a)
|
|
|
|
{
|
|
|
|
copyonwrite();
|
|
|
|
#ifdef CUDALA
|
|
|
|
if(location==cpu)
|
|
|
|
#endif
|
|
|
|
cblas_zaxpy(nn, &CONE, &a, 0, v, 1);
|
|
|
|
#ifdef CUDALA
|
|
|
|
else
|
|
|
|
{
|
|
|
|
complex<double> *d=gpuputcomplex(a);
|
|
|
|
cublasZaxpy(nn, CUONE, (cuDoubleComplex *)d, 0, (cuDoubleComplex *)v, 1);
|
|
|
|
gpufree(d);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
NRVec<double> & NRVec<double>::operator-=(const double &a)
|
|
|
|
{
|
|
|
|
copyonwrite();
|
|
|
|
#ifdef CUDALA
|
|
|
|
if(location==cpu)
|
|
|
|
#endif
|
|
|
|
cblas_daxpy(nn, -1.0, &a, 0, v, 1);
|
|
|
|
#ifdef CUDALA
|
|
|
|
else
|
|
|
|
{
|
|
|
|
double *d=gpuputdouble(a);
|
|
|
|
cublasDaxpy(nn, -1.0, d, 0, v, 1);
|
|
|
|
gpufree(d);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
NRVec< complex<double> > &
|
|
|
|
NRVec< complex<double> >::operator-=(const complex<double> &a)
|
|
|
|
{
|
|
|
|
copyonwrite();
|
|
|
|
#ifdef CUDALA
|
|
|
|
if(location==cpu)
|
|
|
|
#endif
|
|
|
|
cblas_zaxpy(nn, &CMONE, &a, 0, v, 1);
|
|
|
|
#ifdef CUDALA
|
|
|
|
else
|
|
|
|
{
|
|
|
|
complex<double> *d=gpuputcomplex(a);
|
|
|
|
cublasZaxpy(nn, CUMONE, (cuDoubleComplex *)d, 0, (cuDoubleComplex *)v, 1);
|
|
|
|
gpufree(d);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
NRVec<double> & NRVec<double>::operator+=(const NRVec<double> &rhs)
|
|
|
|
{
|
|
|
|
#ifdef DEBUG
|
|
|
|
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
|
|
|
|
#endif
|
|
|
|
copyonwrite();
|
|
|
|
cblas_daxpy(nn, 1.0, rhs.v, 1, v, 1);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
NRVec< complex<double> > &
|
|
|
|
NRVec< complex<double> >::operator+=(const NRVec< complex<double> > &rhs)
|
|
|
|
{
|
|
|
|
#ifdef DEBUG
|
|
|
|
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
|
|
|
|
#endif
|
|
|
|
copyonwrite();
|
|
|
|
cblas_zaxpy(nn, &CONE, rhs.v, 1, v, 1);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
NRVec<double> & NRVec<double>::operator-=(const NRVec<double> &rhs)
|
|
|
|
{
|
|
|
|
#ifdef DEBUG
|
|
|
|
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
|
|
|
|
#endif
|
|
|
|
SAME_LOC(*this,rhs);
|
|
|
|
copyonwrite();
|
|
|
|
#ifdef CUDALA
|
|
|
|
if(location==cpu)
|
|
|
|
#endif
|
|
|
|
cblas_daxpy(nn, -1.0, rhs.v, 1, v, 1);
|
|
|
|
#ifdef CUDALA
|
|
|
|
else
|
|
|
|
cublasDaxpy(nn, -1.0, rhs.v, 1, v, 1);
|
|
|
|
#endif
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
NRVec< complex<double> > &
|
|
|
|
NRVec< complex<double> >::operator-=(const NRVec< complex<double> > &rhs)
|
|
|
|
{
|
|
|
|
#ifdef DEBUG
|
|
|
|
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
|
|
|
|
#endif
|
|
|
|
copyonwrite();
|
|
|
|
cblas_zaxpy(nn, &CMONE, rhs.v, 1, v, 1);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
NRVec<double> & NRVec<double>::operator*=(const double &a)
|
|
|
|
{
|
|
|
|
copyonwrite();
|
|
|
|
cblas_dscal(nn, a, v, 1);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
NRVec< complex<double> > &
|
|
|
|
NRVec< complex<double> >::operator*=(const complex<double> &a)
|
|
|
|
{
|
|
|
|
copyonwrite();
|
|
|
|
cblas_zscal(nn, &a, v, 1);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
const double NRVec<double>::operator*(const NRVec<double> &rhs) const
|
|
|
|
{
|
|
|
|
#ifdef DEBUG
|
|
|
|
if (nn != rhs.nn) laerror("dot of incompatible vectors");
|
|
|
|
#endif
|
|
|
|
return cblas_ddot(nn, v, 1, rhs.v, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
const complex<double>
|
|
|
|
NRVec< complex<double> >::operator*(const NRVec< complex<double> > &rhs) const
|
|
|
|
{
|
|
|
|
#ifdef DEBUG
|
|
|
|
if (nn != rhs.nn) laerror("dot of incompatible vectors");
|
|
|
|
#endif
|
|
|
|
complex<double> dot;
|
|
|
|
cblas_zdotc_sub(nn, v, 1, rhs.v, 1, &dot);
|
|
|
|
return dot;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sum of elements
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
const double NRVec<double>::asum() const
|
|
|
|
{
|
|
|
|
return cblas_dasum(nn, v, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Dot product: x * y
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
const double NRVec<double>::dot(const double *y, const int stride) const
|
|
|
|
{
|
|
|
|
return cblas_ddot(nn, y, stride, v, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
const complex<double>
|
|
|
|
NRVec< complex<double> >::dot(const complex<double> *y, const int stride) const
|
|
|
|
{
|
|
|
|
complex<double> dot;
|
|
|
|
cblas_zdotc_sub(nn, y, stride, v, 1, &dot);
|
|
|
|
return dot;
|
|
|
|
}
|
|
|
|
|
|
|
|
// return norm of the Vec
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
const double NRVec<double>::norm() const
|
|
|
|
{
|
|
|
|
#ifdef CUDALA
|
|
|
|
if(location!=cpu) return cublasDnrm2(nn, v, 1);
|
|
|
|
#endif
|
|
|
|
return cblas_dnrm2(nn, v, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
const double NRVec< complex<double> >::norm() const
|
|
|
|
{
|
|
|
|
return cblas_dznrm2(nn, v, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Max element of the array
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
const double NRVec<double>::amax() const
|
|
|
|
{
|
|
|
|
return v[cblas_idamax(nn, v, 1)];
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
cblas_izamax seems to be missing at least in some cblas versions
|
|
|
|
template<>
|
|
|
|
inline
|
|
|
|
const complex<double> NRVec< complex<double> >::amax() const
|
|
|
|
{
|
|
|
|
return v[cblas_izamax(nn, v, 1)];
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
2009-11-12 22:01:19 +01:00
|
|
|
}//namespace
|
2004-03-17 04:07:21 +01:00
|
|
|
|
|
|
|
#endif /* _LA_VEC_H_ */
|