LA_library/vec.h

997 lines
23 KiB
C
Raw Normal View History

2008-02-26 14:55:23 +01:00
/*
LA: linear algebra C++ interface library
Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
complex versions written by Roman Curik <roman.curik@jh-inst.cas.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
2004-03-17 04:07:21 +01:00
#ifndef _LA_VEC_H_
#define _LA_VEC_H_
2005-02-14 01:10:07 +01:00
#include "la_traits.h"
2009-11-12 22:01:19 +01:00
namespace LA {
2004-03-17 04:07:21 +01:00
//////////////////////////////////////////////////////////////////////////////
// Forward declarations
template <typename T> void lawritemat(FILE *file,const T *a,int r,int c,
const char *form0,int nodim,int modulo, int issym);
// Memory allocated constants for cblas routines
const static complex<double> CONE = 1.0, CMONE = -1.0, CZERO = 0.0;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
const static cuDoubleComplex CUONE = {1.,0.}, CUMONE = {-1.,0.}, CUZERO = {0.,0.};
#endif
2004-03-17 04:07:21 +01:00
// Macros to construct binary operators +,-,*, from +=, -=, *=
// for 3 cases: X + a, a + X, X + Y
#define NRVECMAT_OPER(E,X) \
template<class T> \
inline const NR##E<T> NR##E<T>::operator X(const T &a) const \
{ return NR##E(*this) X##= a; } \
\
template<class T> \
inline const NR##E<T> operator X(const T &a, const NR##E<T> &rhs) \
{ return NR##E<T>(rhs) X##= a; }
#define NRVECMAT_OPER2(E,X) \
template<class T> \
2010-06-25 17:28:19 +02:00
inline const NR##E<T> NR##E<T>::operator X(const NR##E<T> &a) const \
2004-03-17 04:07:21 +01:00
{ return NR##E(*this) X##= a; }
2004-03-17 17:39:07 +01:00
2004-03-17 04:07:21 +01:00
// NRVec class
template <typename T>
class NRVec {
protected:
int nn;
T *v;
int *count;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
GPUID location;
#endif
2004-03-17 04:07:21 +01:00
public:
friend class NRSMat<T>;
friend class NRMat<T>;
2010-06-25 17:28:19 +02:00
inline NRVec(): nn(0),v(0),count(0)
{
#ifdef CUDALA
location = DEFAULT_LOC;
#endif
};
explicit inline NRVec(const int n, const GPUID loc= undefined) : nn(n), count(new int(1))
{
#ifdef CUDALA
if(loc==undefined) location = DEFAULT_LOC; else location = loc;
if(location==cpu)
#endif
v= new T[n];
#ifdef CUDALA
else
v= (T*) gpualloc(n*sizeof(T));
#endif
};
2004-03-17 04:07:21 +01:00
inline NRVec(const T &a, const int n);
2006-09-12 01:07:22 +02:00
inline NRVec(const T *a, const int n);
inline NRVec(T *a, const int n, bool skeleton);
2004-03-17 04:07:21 +01:00
inline NRVec(const NRVec &rhs);
2009-09-04 10:09:32 +02:00
NRVec(const typename LA_traits_complex<T>::NRVec_Noncomplex_type &rhs, bool imagpart=false); //construct complex from real
2004-03-17 04:07:21 +01:00
inline explicit NRVec(const NRSMat<T> & S);
2005-11-20 14:46:00 +01:00
#ifdef MATPTR
explicit NRVec(const NRMat<T> &rhs) : NRVec(&rhs[0][0],rhs.nrows()*rhs.ncols()) {};
#else
2004-03-17 04:07:21 +01:00
explicit NRVec(const NRMat<T> &rhs);
2010-06-25 17:28:19 +02:00
#endif
#ifdef CUDALA
inline GPUID getlocation() const {return location;}
void moveto(const GPUID dest);
#else
inline GPUID getlocation() const {return cpu;}
void moveto(const GPUID dest) {};
2004-03-17 04:07:21 +01:00
#endif
NRVec & operator=(const NRVec &rhs);
NRVec & operator=(const T &a); //assign a to every element
2009-09-04 10:09:32 +02:00
void randomize(const typename LA_traits<T>::normtype &x);
2004-03-17 04:07:21 +01:00
NRVec & operator|=(const NRVec &rhs);
2005-09-06 17:55:07 +02:00
const bool operator!=(const NRVec &rhs) const {if(nn!=rhs.nn) return 1; return LA_traits<T>::gencmp(v,rhs.v,nn);} //memcmp for scalars else elementwise
2005-02-14 01:10:07 +01:00
const bool operator==(const NRVec &rhs) const {return !(*this != rhs);};
2005-09-06 17:55:07 +02:00
const bool operator>(const NRVec &rhs) const;
const bool operator<(const NRVec &rhs) const;
const bool operator>=(const NRVec &rhs) const {return !(*this < rhs);};
const bool operator<=(const NRVec &rhs) const {return !(*this > rhs);};
2004-03-17 04:07:21 +01:00
const NRVec operator-() const;
inline NRVec & operator+=(const NRVec &rhs);
inline NRVec & operator-=(const NRVec &rhs);
2006-04-06 23:45:51 +02:00
inline NRVec & operator*=(const NRVec &rhs); //elementwise
inline NRVec & operator/=(const NRVec &rhs); //elementwise
2004-03-17 04:07:21 +01:00
inline NRVec & operator+=(const T &a);
inline NRVec & operator-=(const T &a);
inline NRVec & operator*=(const T &a);
inline int getcount() const {return count?*count:0;}
inline const NRVec operator+(const NRVec &rhs) const;
inline const NRVec operator-(const NRVec &rhs) const;
inline const NRVec operator+(const T &a) const;
inline const NRVec operator-(const T &a) const;
inline const NRVec operator*(const T &a) const;
2005-02-18 23:08:15 +01:00
inline const T operator*(const NRVec &rhs) const; //scalar product -> dot
inline const T dot(const NRVec &rhs) const {return *this * rhs;}; //@@@for complex do conjugate
void gemv(const T beta, const NRMat<T> &a, const char trans, const T alpha, const NRVec &x);
void gemv(const T beta, const NRSMat<T> &a, const char trans /*just for compatibility*/, const T alpha, const NRVec &x);
2006-04-06 23:45:51 +02:00
void gemv(const T beta, const SparseMat<T> &a, const char trans, const T alpha, const NRVec &x,const bool treat_as_symmetric=false);
2009-09-04 10:09:32 +02:00
void gemv(const typename LA_traits_complex<T>::Component_type beta, const typename LA_traits_complex<T>::NRMat_Noncomplex_type &a, const char trans, const typename LA_traits_complex<T>::Component_type alpha, const NRVec &x);
void gemv(const typename LA_traits_complex<T>::Component_type beta, const typename LA_traits_complex<T>::NRSMat_Noncomplex_type &a, const char trans, const typename LA_traits_complex<T>::Component_type alpha, const NRVec &x);
2010-06-25 17:28:19 +02:00
const NRVec operator*(const NRMat<T> &mat) const {NRVec<T> result(mat.ncols(),mat.getlocation()); result.gemv((T)0,mat,'t',(T)1,*this); return result;};
const NRVec operator*(const NRSMat<T> &mat) const {NRVec<T> result(mat.ncols(),mat.getlocation()); result.gemv((T)0,mat,'t',(T)1,*this); return result;};
2005-02-18 23:08:15 +01:00
const NRVec operator*(const SparseMat<T> &mat) const {NRVec<T> result(mat.ncols()); result.gemv((T)0,mat,'t',(T)1,*this); return result;};
2009-11-12 22:01:19 +01:00
const NRMat<T> otimes(const NRVec<T> &rhs, const bool conjugate=false, const T &scale=1) const; //outer product
inline const NRMat<T> operator|(const NRVec<T> &rhs) const {return otimes(rhs,true);};
2008-04-16 14:56:02 +02:00
inline const T sum() const {T sum=0; for(int i=0; i<nn; i++) sum += v[i]; return sum;}; //sum of its elements
inline const T asum() const; //sum of its elements absolute values
2004-03-17 04:07:21 +01:00
inline const T dot(const T *a, const int stride=1) const; // ddot with a stride-vector
inline T & operator[](const int i);
inline const T & operator[](const int i) const;
2010-01-11 11:12:28 +01:00
typedef T ROWTYPE;
inline void setcoldim(int i) {}; //dummy
2004-03-17 04:07:21 +01:00
inline int size() const;
inline operator T*(); //get a pointer to the data
inline operator const T*() const; //get a pointer to the data
~NRVec();
void axpy(const T alpha, const NRVec &x); // this+= a*x
void axpy(const T alpha, const T *x, const int stride=1); // this+= a*x
void copyonwrite();
2009-09-04 10:09:32 +02:00
void clear() {copyonwrite(); LA_traits<T>::clear(v,nn);}; //zero out
2004-03-17 04:07:21 +01:00
void resize(const int n);
2005-09-11 22:04:24 +02:00
void get(int fd, bool dimensions=1, bool transp=0);
void put(int fd, bool dimensions=1, bool transp=0) const;
2004-03-17 04:07:21 +01:00
NRVec & normalize();
2009-09-04 10:09:32 +02:00
inline const typename LA_traits<T>::normtype norm() const;
2004-03-17 04:07:21 +01:00
inline const T amax() const;
inline const NRVec unitvector() const;
void fprintf(FILE *f, const char *format, const int modulo) const;
void fscanf(FILE *f, const char *format);
//sparse matrix concerning members
explicit NRVec(const SparseMat<T> &rhs); // dense from sparse matrix with one of dimensions =1
inline void simplify() {}; //just for compatibility with sparse ones
2006-04-01 14:58:57 +02:00
bool bigger(int i, int j) const {return LA_traits<T>::bigger(v[i],v[j]);};
bool smaller(int i, int j) const {return LA_traits<T>::smaller(v[i],v[j]);};
void swap(int i, int j) {T tmp; tmp=v[i]; v[i]=v[j]; v[j]=tmp;};
2006-04-01 16:56:35 +02:00
int sort(int direction=0, int from=0, int to= -1, int *perm=NULL); //sort, ascending by default, returns parity of permutation
2010-02-25 21:47:01 +01:00
NRVec & CallOnMe(T (*_F)(const T &) ) {copyonwrite(); for(int i=0; i<nn; ++i) v[i] = _F(v[i]); return *this;};
2004-03-17 04:07:21 +01:00
};
2009-11-12 22:01:19 +01:00
}//namespace
2006-04-01 14:58:57 +02:00
2005-02-18 23:08:15 +01:00
//due to mutual includes this has to be after full class declaration
#include "mat.h"
#include "smat.h"
#include "sparsemat.h"
2010-01-11 11:12:28 +01:00
#include "sparsesmat.h"
2005-02-18 23:08:15 +01:00
2010-06-25 17:28:19 +02:00
2009-11-12 22:01:19 +01:00
namespace LA {
2006-10-21 17:32:53 +02:00
// formatted I/O
template <typename T>
2009-11-12 22:01:19 +01:00
std::ostream & operator<<(std::ostream &s, const NRVec<T> &x)
2006-10-21 17:32:53 +02:00
{
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
if(x.getlocation()==cpu)
{
#endif
2006-10-21 17:32:53 +02:00
int i, n;
n = x.size();
2009-11-12 22:01:19 +01:00
s << n << std::endl;
2006-10-21 17:32:53 +02:00
for(i=0; i<n; i++) s << (typename LA_traits_io<T>::IOtype)x[i] << (i == n-1 ? '\n' : ' ');
return s;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
}
else
{
NRVec<T> tmp=x;
tmp.moveto(cpu);
return s<<tmp;
}
#endif
2006-10-21 17:32:53 +02:00
}
template <typename T>
2009-11-12 22:01:19 +01:00
std::istream & operator>>(std::istream &s, NRVec<T> &x)
2006-10-21 17:32:53 +02:00
{
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
if(x.getlocation()==cpu)
{
#endif
2006-10-21 17:32:53 +02:00
int i,n;
s >> n;
x.resize(n);
typename LA_traits_io<T>::IOtype tmp;
for(i=0; i<n; i++) {s >> tmp; x[i]=tmp;}
return s;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
}
else
{
NRVec<T> tmp;
tmp.moveto(cpu);
s >> tmp;
tmp.moveto(x.getlocation());
x=tmp;
return s;
}
#endif
2006-10-21 17:32:53 +02:00
}
2004-03-17 04:07:21 +01:00
// INLINES
// ctors
template <typename T>
2010-06-25 17:28:19 +02:00
inline NRVec<T>::NRVec(const T& a, const int n) : nn(n), count(new int)
2004-03-17 04:07:21 +01:00
{
*count = 1;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
location=DEFAULT_LOC;
if(location==cpu)
{
#endif
v = new T[n];
2004-03-17 04:07:21 +01:00
if(a != (T)0)
for(int i=0; i<n; i++)
v[i] = a;
else
memset(v, 0, nn*sizeof(T));
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
}
else
{
v= (T*) gpualloc(n*sizeof(T));
cublasSetVector(n,sizeof(T),&a,0,v,1);
}
#endif
2004-03-17 04:07:21 +01:00
}
2010-06-25 17:28:19 +02:00
2004-03-17 04:07:21 +01:00
template <typename T>
2006-09-12 01:07:22 +02:00
inline NRVec<T>::NRVec(const T *a, const int n) : nn(n), count(new int)
2004-03-17 04:07:21 +01:00
{
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
location=DEFAULT_LOC;
if(location==cpu)
{
#endif
v=new T[n];
*count = 1;
memcpy(v, a, n*sizeof(T));
#ifdef CUDALA
}
else
{
v= (T*) gpualloc(n*sizeof(T));
cublasSetVector(n,sizeof(T),a,1,v,1);
}
#endif
2006-09-12 01:07:22 +02:00
}
template <typename T>
inline NRVec<T>::NRVec(T *a, const int n, bool skeleton) : nn(n), count(new int)
{
if(!skeleton)
{
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
location=DEFAULT_LOC;
if(location==cpu)
{
#endif
2006-09-12 01:07:22 +02:00
v=new T[n];
*count = 1;
memcpy(v, a, n*sizeof(T));
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
}
else
{
v= (T*) gpualloc(n*sizeof(T));
cublasSetVector(n,sizeof(T),a,1,v,1);
}
#endif
2006-09-12 01:07:22 +02:00
}
else
{
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
if(location!=cpu) laerror("NRVec() with skeleton option cannot be on GPU");
#endif
2006-09-12 01:07:22 +02:00
*count = 2;
v=a;
}
2004-03-17 04:07:21 +01:00
}
template <typename T>
inline NRVec<T>::NRVec(const NRVec<T> &rhs)
{
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
location=rhs.location;
#endif
2004-03-17 04:07:21 +01:00
v = rhs.v;
nn = rhs.nn;
count = rhs.count;
if(count) (*count)++;
}
template <typename T>
inline NRVec<T>::NRVec(const NRSMat<T> &rhs)
{
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
location=rhs.location;
#endif
2004-03-17 04:07:21 +01:00
nn = rhs.nn;
nn = NN2;
v = rhs.v;
count = rhs.count;
(*count)++;
}
2010-06-25 17:28:19 +02:00
// x +/-= a
2004-03-17 06:34:59 +01:00
template <typename T>
inline NRVec<T> & NRVec<T>::operator+=(const T &a)
{
2010-06-25 17:28:19 +02:00
NOT_GPU(*this);
2004-03-17 06:34:59 +01:00
copyonwrite();
int i;
for(i=0; i<nn; ++i) v[i]+=a;
return *this;
}
template <typename T>
inline NRVec<T> & NRVec<T>::operator-=(const T &a)
{
2010-06-25 17:28:19 +02:00
NOT_GPU(*this);
2004-03-17 06:34:59 +01:00
copyonwrite();
2010-06-25 17:28:19 +02:00
int i;
for(i=0; i<nn; ++i) v[i]-=a;
2004-03-17 06:34:59 +01:00
return *this;
}
2004-03-17 04:07:21 +01:00
2010-06-25 17:28:19 +02:00
// x += x
2004-03-17 06:34:59 +01:00
template <typename T>
inline NRVec<T> & NRVec<T>::operator+=(const NRVec<T> &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
2010-06-25 17:28:19 +02:00
NOT_GPU(*this);
NOT_GPU(rhs);
2004-03-17 06:34:59 +01:00
#endif
copyonwrite();
int i;
for(i=0; i<nn; ++i) v[i]+=rhs.v[i];
return *this;
}
2006-04-06 23:45:51 +02:00
//for general type only
template <typename T>
inline NRVec<T> & NRVec<T>::operator*=(const NRVec<T> &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("*= of incompatible vectors");
#endif
copyonwrite();
int i;
for(i=0; i<nn; ++i) v[i]*=rhs.v[i];
return *this;
}
template <typename T>
inline NRVec<T> & NRVec<T>::operator/=(const NRVec<T> &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("/= of incompatible vectors");
2010-06-25 17:28:19 +02:00
NOT_GPU(*this);
NOT_GPU(rhs);
2006-04-06 23:45:51 +02:00
#endif
copyonwrite();
int i;
for(i=0; i<nn; ++i) v[i]/=rhs.v[i];
return *this;
}
2004-03-17 06:34:59 +01:00
2004-03-17 04:07:21 +01:00
// x -= x
2004-03-17 06:34:59 +01:00
template <typename T>
inline NRVec<T> & NRVec<T>::operator-=(const NRVec<T> &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
2010-06-25 17:28:19 +02:00
NOT_GPU(*this);
NOT_GPU(rhs);
2004-03-17 06:34:59 +01:00
#endif
copyonwrite();
int i;
for(i=0; i<nn; ++i) v[i]-=rhs.v[i];
return *this;
}
2004-03-17 04:07:21 +01:00
// x *= a
2004-03-17 06:34:59 +01:00
template <typename T>
inline NRVec<T> & NRVec<T>::operator*=(const T &a)
{
2010-06-25 17:28:19 +02:00
NOT_GPU(*this);
2004-03-17 06:34:59 +01:00
copyonwrite();
int i;
for(i=0; i<nn; ++i) v[i]*=a;
return *this;
}
2004-03-17 04:07:21 +01:00
// scalar product x.y
2005-02-18 23:08:15 +01:00
template<typename T>
inline const T NRVec<T>::operator*(const NRVec<T> &rhs) const
2004-03-17 04:07:21 +01:00
{
2005-02-18 23:08:15 +01:00
#ifdef DEBUG
if (nn != rhs.nn) laerror("dot of incompatible vectors");
2010-06-25 17:28:19 +02:00
NOT_GPU(*this);
NOT_GPU(rhs);
2005-02-18 23:08:15 +01:00
#endif
T dot = 0;
for(int i=0; i<nn; ++i) dot+= v[i]*rhs.v[i];
return dot;
2004-03-17 04:07:21 +01:00
}
2005-02-18 23:08:15 +01:00
2004-03-17 04:07:21 +01:00
// x[i] returns i-th element
template <typename T>
inline T & NRVec<T>::operator[](const int i)
{
#ifdef DEBUG
2009-09-04 10:09:32 +02:00
if(_LA_count_check && *count != 1) laerror("possible lval [] with count > 1");
2004-03-17 04:07:21 +01:00
if(i < 0 || i >= nn) laerror("NRVec out of range");
if(!v) laerror("[] on unallocated NRVec");
2010-06-25 17:28:19 +02:00
NOT_GPU(*this);
2004-03-17 04:07:21 +01:00
#endif
return v[i];
}
template <typename T>
inline const T & NRVec<T>::operator[](const int i) const
{
#ifdef DEBUG
if(i < 0 || i >= nn) laerror("NRVec out of range");
if(!v) laerror("[] on unallocated NRVec");
2010-06-25 17:28:19 +02:00
NOT_GPU(*this);
2004-03-17 04:07:21 +01:00
#endif
return v[i];
}
// length of the vector
template <typename T>
inline int NRVec<T>::size() const
{
return nn;
}
// reference Vec to the first element
template <typename T>
inline NRVec<T>::operator T*()
{
#ifdef DEBUG
if(!v) laerror("unallocated NRVec in operator T*");
#endif
return v;
}
template <typename T>
inline NRVec<T>::operator const T*() const
{
#ifdef DEBUG
if(!v) laerror("unallocated NRVec in operator T*");
#endif
return v;
}
// Make Vec unitvector
template <typename T>
inline const NRVec<T> NRVec<T>::unitvector() const
{
return NRVec<T>(*this).normalize();
}
// generate operators: Vec + a, a + Vec, Vec * a
NRVECMAT_OPER(Vec,+)
NRVECMAT_OPER(Vec,-)
NRVECMAT_OPER(Vec,*)
// generate operators: Vec + Vec, Vec - Vec
NRVECMAT_OPER2(Vec,+)
NRVECMAT_OPER2(Vec,-)
// Few forward declarations
2004-03-17 17:39:07 +01:00
//basic stuff which has to be in .h
// dtor
template <typename T>
NRVec<T>::~NRVec()
{
if(!count) return;
if(--(*count) <= 0) {
2010-06-25 17:28:19 +02:00
if(v)
{
#ifdef CUDALA
if(location==cpu)
#endif
delete[] (v);
#ifdef CUDALA
else gpufree(v);
#endif
}
2004-03-17 17:39:07 +01:00
delete count;
}
}
// detach from a physical vector and make own copy
template <typename T>
void NRVec<T>::copyonwrite()
{
2008-03-05 14:49:51 +01:00
if(!count) laerror("Vec::copyonwrite() of an undefined vector");
2004-03-17 17:39:07 +01:00
if(*count > 1)
{
(*count)--;
count = new int;
*count = 1;
2010-06-25 17:28:19 +02:00
T *newv;
#ifdef CUDALA
if(location==cpu)
{
#endif
newv = new T[nn];
memcpy(newv, v, nn*sizeof(T));
#ifdef CUDALA
}
else
{
newv = (T *) gpualloc(nn*sizeof(T));
if(sizeof(T)%sizeof(float)!=0) laerror("cpu memcpy alignment problem");
cublasScopy(nn*sizeof(T)/sizeof(float),(const float *) v,1,(float *)newv,1);
}
#endif
2004-03-17 17:39:07 +01:00
v = newv;
}
}
2010-06-25 17:28:19 +02:00
2004-03-17 17:39:07 +01:00
// Asignment
template <typename T>
NRVec<T> & NRVec<T>::operator=(const NRVec<T> &rhs)
{
if (this != &rhs)
{
if(count)
if(--(*count) == 0)
{
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
if(location==cpu)
#endif
delete[] v;
#ifdef CUDALA
else
gpufree(v);
#endif
2004-03-17 17:39:07 +01:00
delete count;
}
v = rhs.v;
nn = rhs.nn;
count = rhs.count;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
location=rhs.location;
#endif
2004-03-17 17:39:07 +01:00
if(count) (*count)++;
}
return *this;
}
2010-06-25 17:28:19 +02:00
2004-03-17 17:39:07 +01:00
// Resize
template <typename T>
void NRVec<T>::resize(const int n)
{
#ifdef DEBUG
2005-02-14 01:10:07 +01:00
if(n<0) laerror("illegal vector dimension");
2004-03-17 17:39:07 +01:00
#endif
if(count)
2005-02-14 01:10:07 +01:00
{
if(n==0)
{
if(--(*count) <= 0) {
2010-06-25 17:28:19 +02:00
if(v)
{
#ifdef CUDALA
if(location==cpu)
#endif
delete[] (v);
#ifdef CUDALA
else
gpufree(v);
#endif
}
2005-02-14 01:10:07 +01:00
delete count;
}
count=0;
nn=0;
v=0;
return;
}
2004-03-17 17:39:07 +01:00
if(*count > 1) {
(*count)--;
count = 0;
v = 0;
nn = 0;
}
2005-02-14 01:10:07 +01:00
}
2004-03-17 17:39:07 +01:00
if(!count) {
count = new int;
*count = 1;
nn = n;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
if(location==cpu)
#endif
v = new T[nn];
#ifdef CUDALA
else
v = (T*) gpualloc(nn*sizeof(T));
#endif
2004-03-17 17:39:07 +01:00
return;
}
// *count = 1 in this branch
if (n != nn) {
nn = n;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
if(location==cpu)
#endif
{
delete[] v;
v = new T[nn];
}
#ifdef CUDALA
else
{
gpufree(v);
v = (T*) gpualloc(nn*sizeof(T));
}
#endif
2004-03-17 17:39:07 +01:00
}
}
2006-04-06 23:45:51 +02:00
// assignment with a physical (deep) copy
2004-03-17 17:39:07 +01:00
template <typename T>
NRVec<T> & NRVec<T>::operator|=(const NRVec<T> &rhs)
{
#ifdef DEBUG
if (!rhs.v) laerror("unallocated rhs in NRVec operator |=");
#endif
2010-06-25 17:28:19 +02:00
if (this == &rhs) return *this;
*this = rhs;
this->copyonwrite();
return *this;
2004-03-17 17:39:07 +01:00
}
2010-06-25 17:28:19 +02:00
2006-04-01 06:48:01 +02:00
template<typename T>
NRVec<complex<T> > complexify(const NRVec<T> &rhs)
{
NRVec<complex<T> > r(rhs.size());
for(int i=0; i<rhs.size(); ++i) r[i]=rhs[i];
return r;
}
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
template<typename T>
void NRVec<T>::moveto(const GPUID dest)
{
if(location==dest) return;
2010-09-08 15:30:20 +02:00
CPU_GPU(location,dest);
2010-06-25 17:28:19 +02:00
location=dest;
if(v && !count) laerror("internal inconsistency of reference counting 1");
if (!count) return;
if(v && *count==0) laerror("internal inconsistency of reference counting 2");
if(!v) return;
T *vold = v;
if(dest == cpu) //moving from GPU to CPU
{
v = new T[nn];
gpuget(nn,sizeof(T),vold,v);
if(*count == 1) gpufree(vold);
else {--(*count); count = new int(1);}
}
else //moving from CPU to GPU
{
v=(T *) gpualloc(nn*sizeof(T));
gpuput(nn,sizeof(T),vold,v);
if(*count == 1) delete[] vold;
else {--(*count); count = new int(1);}
}
}
#endif
//some template specializations leading to BLAS/CUBLAS calls
template<>
inline
NRVec<double> & NRVec<double>::operator+=(const double &a)
{
copyonwrite();
#ifdef CUDALA
if(location==cpu)
#endif
cblas_daxpy(nn, 1.0, &a, 0, v, 1);
#ifdef CUDALA
else
{
double *d=gpuputdouble(a);
cublasDaxpy(nn, 1.0, d, 0, v, 1);
gpufree(d);
}
#endif
return *this;
}
template<>
inline
NRVec< complex<double> > &
NRVec< complex<double> >::operator+=(const complex<double> &a)
{
copyonwrite();
#ifdef CUDALA
if(location==cpu)
#endif
cblas_zaxpy(nn, &CONE, &a, 0, v, 1);
#ifdef CUDALA
else
{
complex<double> *d=gpuputcomplex(a);
cublasZaxpy(nn, CUONE, (cuDoubleComplex *)d, 0, (cuDoubleComplex *)v, 1);
gpufree(d);
}
#endif
return *this;
}
template<>
inline
NRVec<double> & NRVec<double>::operator-=(const double &a)
{
copyonwrite();
#ifdef CUDALA
if(location==cpu)
#endif
cblas_daxpy(nn, -1.0, &a, 0, v, 1);
#ifdef CUDALA
else
{
double *d=gpuputdouble(a);
cublasDaxpy(nn, -1.0, d, 0, v, 1);
gpufree(d);
}
#endif
return *this;
}
template<>
inline
NRVec< complex<double> > &
NRVec< complex<double> >::operator-=(const complex<double> &a)
{
copyonwrite();
#ifdef CUDALA
if(location==cpu)
#endif
cblas_zaxpy(nn, &CMONE, &a, 0, v, 1);
#ifdef CUDALA
else
{
complex<double> *d=gpuputcomplex(a);
cublasZaxpy(nn, CUMONE, (cuDoubleComplex *)d, 0, (cuDoubleComplex *)v, 1);
gpufree(d);
}
#endif
return *this;
}
template<>
inline
NRVec<double> & NRVec<double>::operator+=(const NRVec<double> &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
#endif
copyonwrite();
cblas_daxpy(nn, 1.0, rhs.v, 1, v, 1);
return *this;
}
template<>
inline
NRVec< complex<double> > &
NRVec< complex<double> >::operator+=(const NRVec< complex<double> > &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
#endif
copyonwrite();
cblas_zaxpy(nn, &CONE, rhs.v, 1, v, 1);
return *this;
}
template<>
inline
NRVec<double> & NRVec<double>::operator-=(const NRVec<double> &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
#endif
SAME_LOC(*this,rhs);
copyonwrite();
#ifdef CUDALA
if(location==cpu)
#endif
cblas_daxpy(nn, -1.0, rhs.v, 1, v, 1);
#ifdef CUDALA
else
cublasDaxpy(nn, -1.0, rhs.v, 1, v, 1);
#endif
return *this;
}
template<>
inline
NRVec< complex<double> > &
NRVec< complex<double> >::operator-=(const NRVec< complex<double> > &rhs)
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("daxpy of incompatible vectors");
#endif
copyonwrite();
cblas_zaxpy(nn, &CMONE, rhs.v, 1, v, 1);
return *this;
}
template<>
inline
NRVec<double> & NRVec<double>::operator*=(const double &a)
{
copyonwrite();
cblas_dscal(nn, a, v, 1);
return *this;
}
template<>
inline
NRVec< complex<double> > &
NRVec< complex<double> >::operator*=(const complex<double> &a)
{
copyonwrite();
cblas_zscal(nn, &a, v, 1);
return *this;
}
template<>
inline
const double NRVec<double>::operator*(const NRVec<double> &rhs) const
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("dot of incompatible vectors");
#endif
return cblas_ddot(nn, v, 1, rhs.v, 1);
}
template<>
inline
const complex<double>
NRVec< complex<double> >::operator*(const NRVec< complex<double> > &rhs) const
{
#ifdef DEBUG
if (nn != rhs.nn) laerror("dot of incompatible vectors");
#endif
complex<double> dot;
cblas_zdotc_sub(nn, v, 1, rhs.v, 1, &dot);
return dot;
}
// Sum of elements
template<>
inline
const double NRVec<double>::asum() const
{
return cblas_dasum(nn, v, 1);
}
// Dot product: x * y
template<>
inline
const double NRVec<double>::dot(const double *y, const int stride) const
{
return cblas_ddot(nn, y, stride, v, 1);
}
template<>
inline
const complex<double>
NRVec< complex<double> >::dot(const complex<double> *y, const int stride) const
{
complex<double> dot;
cblas_zdotc_sub(nn, y, stride, v, 1, &dot);
return dot;
}
// return norm of the Vec
template<>
inline
const double NRVec<double>::norm() const
{
#ifdef CUDALA
if(location!=cpu) return cublasDnrm2(nn, v, 1);
#endif
return cblas_dnrm2(nn, v, 1);
}
template<>
inline
const double NRVec< complex<double> >::norm() const
{
return cblas_dznrm2(nn, v, 1);
}
// Max element of the array
template<>
inline
const double NRVec<double>::amax() const
{
return v[cblas_idamax(nn, v, 1)];
}
/*
cblas_izamax seems to be missing at least in some cblas versions
template<>
inline
const complex<double> NRVec< complex<double> >::amax() const
{
return v[cblas_izamax(nn, v, 1)];
}
*/
2009-11-12 22:01:19 +01:00
}//namespace
2004-03-17 04:07:21 +01:00
#endif /* _LA_VEC_H_ */