1296 lines
45 KiB
C++
1296 lines
45 KiB
C++
//------------------------------------------------------------------------------
|
|
/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
|
|
//------------------------------------------------------------------------------
|
|
/*
|
|
LA: linear algebra C++ interface library
|
|
Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
|
|
complex versions written by Roman Curik <roman.curik@jh-inst.cas.cz>
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
#ifndef _LA_SMAT_H_
|
|
#define _LA_SMAT_H_
|
|
#include "la_traits.h"
|
|
|
|
namespace LA {
|
|
#define NN2 ((size_t)nn*(nn+1)/2)
|
|
|
|
//forward declaration
|
|
template<typename T> class NRPerm;
|
|
template<typename T> class NRSMat_from1;
|
|
|
|
|
|
/***************************************************************************//**
|
|
* This class implements a general symmetric or hermitian matrix the elements
|
|
* of which are stored in packed form. Particularly the lower triangular part
|
|
* of a symmetric or hermitian matrix of order \f$N\f$ is interpreted as a
|
|
* vector of length \f$N(N+1)/2\f$ in row-major storage scheme.
|
|
******************************************************************************/
|
|
template <class T>
|
|
class NRSMat{
|
|
protected:
|
|
int nn;//!< number of rows/columns of this symmetric matrix
|
|
T *v;//!< internal pointer to the underlying data structure
|
|
int *count;//!< pointer to the reference counter
|
|
#ifdef CUDALA
|
|
GPUID location;//!< specification of memory (GPU/CPU) location where this objects resides
|
|
#endif
|
|
public:
|
|
friend class NRVec<T>;
|
|
friend class NRMat<T>;
|
|
|
|
~NRSMat();
|
|
|
|
//! default constructor of null-matrix
|
|
inline NRSMat() : nn(0),v(0),count(0) {
|
|
#ifdef CUDALA
|
|
location = DEFAULT_LOC;
|
|
#endif
|
|
};
|
|
|
|
//! default constructor
|
|
inline explicit NRSMat(const int n, const GPUID loc = undefined);
|
|
|
|
//! constructor initializing the matrix being created by given scalar value
|
|
inline NRSMat(const T &a, const int n);
|
|
|
|
//! constructor initializing the matrix being created by data located at given memory position
|
|
inline NRSMat(const T *a, const int n);
|
|
|
|
//! copy constructor
|
|
inline NRSMat(const NRSMat &rhs);
|
|
|
|
//! constructor converting real matrix to its complex counterpart
|
|
NRSMat(const typename LA_traits_complex<T>::NRSMat_Noncomplex_type &rhs, bool imagpart = false);
|
|
|
|
//! constructor creating symmetric part of a general matrix
|
|
explicit NRSMat(const NRMat<T> &rhs);
|
|
|
|
//! construct symmetric matrix by filling the lower triangle with data stored in a vector
|
|
explicit NRSMat(const NRVec<T> &rhs, const int n);
|
|
|
|
//! assignment operator performing shallow copy
|
|
NRSMat & operator=(const NRSMat &rhs);
|
|
|
|
//! assignment operator performing deep copy
|
|
NRSMat & operator|=(const NRSMat &rhs);
|
|
|
|
//! fill the matrix with pseudorandom numbers (uniform distribution)
|
|
void randomize(const typename LA_traits<T>::normtype &x);
|
|
|
|
//! assign scalar value to diagonal elements
|
|
NRSMat & operator=(const T &a);
|
|
|
|
//! unit matrix for general power
|
|
void identity() {*this = (T)1;}
|
|
|
|
//! inverse matrix
|
|
NRSMat inverse();
|
|
|
|
//! conjugate a matrix
|
|
NRSMat& conjugateme();
|
|
const NRSMat conjugate() const {NRSMat r(*this); r.conjugateme(); return r;};
|
|
|
|
|
|
//! permute matrix elements
|
|
const NRSMat permuted(const NRPerm<int> &p, const bool inverse=false) const;
|
|
|
|
inline int getcount() const {return count?*count:0;}
|
|
|
|
#ifdef CUDALA
|
|
inline GPUID getlocation() const {return location;}
|
|
void moveto(const GPUID dest);
|
|
#else
|
|
inline GPUID getlocation() const {return cpu;}
|
|
void moveto(const GPUID dest) {};
|
|
#endif
|
|
|
|
//! relational operator for testing nonequality
|
|
const bool operator!=(const NRSMat &rhs) const {if(nn!=rhs.nn) return 1; return LA_traits<T>::gencmp(v,rhs.v,NN2);};
|
|
//! relational operator for testing equality
|
|
const bool operator==(const NRSMat &rhs) const {return !(*this != rhs);};
|
|
|
|
inline NRSMat & operator*=(const T &a);
|
|
inline NRSMat & operator+=(const T &a);
|
|
inline NRSMat & operator-=(const T &a);
|
|
inline NRSMat & operator+=(const NRSMat &rhs);
|
|
inline NRSMat & operator-=(const NRSMat &rhs);
|
|
const NRSMat operator-() const;
|
|
|
|
inline const NRSMat operator*(const T &a) const;
|
|
inline const NRSMat operator+(const T &a) const;
|
|
inline const NRSMat operator-(const T &a) const;
|
|
inline const NRSMat operator+(const NRSMat &rhs) const;
|
|
inline const NRSMat operator-(const NRSMat &rhs) const;
|
|
|
|
inline const NRMat<T> operator+(const NRMat<T> &rhs) const;
|
|
inline const NRMat<T> operator-(const NRMat<T> &rhs) const;
|
|
const NRMat<T> operator*(const NRSMat &rhs) const;
|
|
const NRMat<T> operator*(const NRMat<T> &rhs) const;
|
|
|
|
const T dot(const NRSMat &rhs) const;
|
|
const T dot(const NRVec<T> &rhs) const;
|
|
|
|
const NRVec<T> operator*(const NRVec<T> &rhs) const {NRVec<T> result(nn,rhs.getlocation()); result.gemv((T)0,*this,'n',(T)1,rhs); return result;};
|
|
const NRVec<std::complex<T> > operator*(const NRVec<std::complex<T> > &rhs) const {NRVec<std::complex<T> > result(nn,rhs.getlocation()); result.gemv((T)0,*this,'n',(T)1,rhs); return result;};
|
|
|
|
const T* diagonalof(NRVec<T> &, const bool divide = 0, bool cache = false) const;
|
|
|
|
void gemv(const T beta, NRVec<T> &r, const char trans, const T alpha, const NRVec<T> &x) const {r.gemv(beta,*this,trans,alpha,x);};
|
|
void gemv(const T beta, NRVec<std::complex<T> > &r, const char trans, const T alpha, const NRVec<std::complex<T> > &x) const {r.gemv(beta,*this,trans,alpha,x);};
|
|
|
|
inline const T& operator[](const size_t ij) const;
|
|
inline T& operator[](const size_t ij);
|
|
|
|
inline const T& operator()(const int i, const int j) const;
|
|
inline T& operator()(const int i, const int j);
|
|
|
|
inline int nrows() const;
|
|
inline int ncols() const;
|
|
inline size_t size() const; //NOTE it is the size of data n*(n+1)/2, not nrows
|
|
|
|
inline bool transp(const int i, const int j) const {return i>j;} //this can be used for compact storage of matrices, which are actually not symmetric, but one triangle of them is redundant
|
|
const typename LA_traits<T>::normtype norm(const T scalar = (T)0) const;
|
|
void axpy(const T alpha, const NRSMat &x); // this+= a*x
|
|
|
|
inline const T amax() const;
|
|
inline const T amin() const;
|
|
|
|
const T trace() const;
|
|
void get(int fd, bool dimensions = 1, bool transp = 0);
|
|
void put(int fd, bool dimensions = 1, bool transp = 0) const;
|
|
|
|
void copyonwrite(bool detachonly=false, bool deep=true);
|
|
|
|
void clear() {copyonwrite(LA_traits<T>::is_plaindata()); LA_traits<T>::clear(v,NN2);}; //zero out
|
|
void resize(const int n);
|
|
void dealloc(void) {resize(0);}
|
|
|
|
//! extract specified submatrix
|
|
const NRSMat submatrix(const int from, const int to) const;
|
|
const NRSMat submatrix(const NRVec<int> &selection) const;
|
|
|
|
//! store given matrix at given position into the current matrix
|
|
void storesubmatrix(const int from, const NRSMat &rhs);
|
|
void storesubmatrix(const NRVec<int> &selection, const NRSMat &rhs);
|
|
|
|
|
|
inline operator T*();
|
|
inline operator const T*() const;
|
|
void fprintf(FILE *f, const char *format, const int modulo) const;
|
|
void fscanf(FILE *f, const char *format);
|
|
//members concerning sparse matrix
|
|
explicit NRSMat(const SparseMat<T> &rhs); // dense from sparse
|
|
explicit NRSMat(const SparseSMat<T> &rhs); // dense from sparse
|
|
inline void simplify() {}; //just for compatibility with sparse ones
|
|
bool issymmetric() const {return 1;}
|
|
void swap(NRSMat &rhs) //more efficient swap than via tmp and constructors and operator=
|
|
{
|
|
int tmpnn=nn; nn=rhs.nn; rhs.nn=tmpnn;
|
|
T *tmpv=v; v=rhs.v; rhs.v=tmpv;
|
|
int *tmpcount=count; count=rhs.count; rhs.count=tmpcount;
|
|
#ifdef CUDALA
|
|
GPUID tmplocation=location; location=rhs.location; rhs.location=tmplocation;
|
|
#endif
|
|
|
|
}
|
|
NRSMat<typename LA_traits<T>::normtype> diffabs(const NRSMat &rhs) const; //difference of absolute values
|
|
NRSMat<typename LA_traits<T>::normtype> abs() const; //elementwise absolute values
|
|
};
|
|
|
|
}//namespace
|
|
//due to mutual includes this has to be after full class declaration
|
|
#include "vec.h"
|
|
#include "mat.h"
|
|
#include "permutation.h"
|
|
|
|
namespace LA {
|
|
|
|
/***************************************************************************//**
|
|
* constructor of a symmetric matrix stored in packed form
|
|
* @param[in] n number of rows of the matrix being created
|
|
* @param[in] loc location for storing the matrix
|
|
* @see count, v, location
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline NRSMat<T>::NRSMat(const int n, const GPUID loc): nn(n), count(new int(1)) {
|
|
#ifdef CUDALA
|
|
location = (loc == undefined?DEFAULT_LOC:loc);
|
|
if(location == cpu){
|
|
#endif
|
|
v = new T[NN2];
|
|
#ifdef CUDALA
|
|
}else{
|
|
v = (T*) gpualloc(NN2*sizeof(T));
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* constructor of a symmetric matrix stored in packed form (default location in used)
|
|
* @param[in] a set all matrix elements equal to this value
|
|
* @param[in] n number of rows of the matrix being created
|
|
* @see count, v, location, NRSMat<T>::NRSMat(const int, const GPUID)
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline NRSMat<T>::NRSMat(const T& a, const int n) : nn(n), count(new int(1)) {
|
|
#ifdef CUDALA
|
|
location = DEFAULT_LOC;
|
|
if(location == cpu){
|
|
#endif
|
|
v = new T[NN2];
|
|
if(!LA_traits<T>::is_plaindata() || a != (T)0) for(register size_t i = 0; i<NN2; i++) v[i] = a;
|
|
else memset(v, 0, NN2*sizeof(T));
|
|
|
|
#ifdef CUDALA
|
|
}else{
|
|
v = (T*) gpualloc(NN2*sizeof(T));
|
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
|
cublasSetVector(NN2, sizeof(T), &a, 0, v, 1);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* constructor of a symmetric matrix stored in packed form (default location in used)
|
|
* @param[in] a pointer to data of type T used for matrix inicialization
|
|
* @param[in] n number of rows of the matrix being created
|
|
* @see count, v, location, NRSMat<T>::NRSMat(const int, const GPUID), NRSMat<T>::NRSMat(const T&, const int)
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline NRSMat<T>::NRSMat(const T *a, const int n) : nn(n), count(new int(1)) {
|
|
#ifdef CUDALA
|
|
location = DEFAULT_LOC;
|
|
if(location == cpu){
|
|
#endif
|
|
if(LA_traits<T>::is_plaindata()) memcpy(v, a, NN2*sizeof(T));
|
|
else for( int i=0; i<NN2; i++) v[i] = a[i];
|
|
#ifdef CUDALA
|
|
}else{
|
|
v = (T*) gpualloc(NN2*sizeof(T));
|
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
|
cublasSetVector(NN2, sizeof(T), a, 1, v, 1);
|
|
}
|
|
#endif
|
|
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* copy constructor implementing shallow copy
|
|
* @param[in] rhs reference matrix being copied
|
|
* @see count, v, location
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline NRSMat<T>::NRSMat(const NRSMat<T> &rhs) {
|
|
#ifdef CUDALA
|
|
location = rhs.location;
|
|
#endif
|
|
v = rhs.v;
|
|
nn = rhs.nn;
|
|
count = rhs.count;
|
|
if(count) (*count)++;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* constructor interpreting a vector of \f$n(n+1)/2\f$ elements as a symmetric
|
|
* matrix stored in packed form having \f$n\f$ rows
|
|
* @param[in] rhs reference matrix being copied
|
|
* @param[in] n count of rows of the matrix being created
|
|
******************************************************************************/
|
|
template <typename T>
|
|
NRSMat<T>::NRSMat(const NRVec<T> &rhs, const int n) {
|
|
#ifdef CUDALA
|
|
location = rhs.location;
|
|
#endif
|
|
nn = n;
|
|
#ifdef DEBUG
|
|
if(NN2 != rhs.size()){ laerror("incompatible dimensions in NRSMat<T>::NRSMat(const NRVec<T>&, const int)"); }
|
|
#endif
|
|
count = rhs.count;
|
|
v = rhs.v;
|
|
(*count)++;
|
|
}
|
|
|
|
|
|
/***************************************************************************//**
|
|
* multiply this real symmetric matrix with real scalar value
|
|
* @param[in] a real multiplicative factor
|
|
* @return reference to the modified matrix
|
|
******************************************************************************/
|
|
template<>
|
|
inline NRSMat<double> & NRSMat<double>::operator*=(const double &a) {
|
|
copyonwrite();
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
cblas_dscal(NN2, a, v, 1);
|
|
#ifdef CUDALA
|
|
}else{
|
|
cublasDscal(NN2, a, v, 1);
|
|
TEST_CUBLAS("cublasDscal");//"NRSMat<double>& NRSMat<double>::operator*=(const double &)"
|
|
}
|
|
#endif
|
|
return *this;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* multiply this complex symmetric matrix with complex scalar value
|
|
* @param[in] a complex multiplicative factor
|
|
* @return reference to the modified matrix
|
|
******************************************************************************/
|
|
template<>
|
|
inline NRSMat<std::complex<double> > &
|
|
NRSMat<std::complex<double> >::operator*=(const std::complex<double> &a) {
|
|
copyonwrite();
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
cblas_zscal(NN2, &a, v, 1);
|
|
#ifdef CUDALA
|
|
}else{
|
|
const cuDoubleComplex _a = make_cuDoubleComplex(a.real(), a.imag());
|
|
cublasZscal(NN2, _a, (cuDoubleComplex*)v, 1);
|
|
TEST_CUBLAS("cublasZscal");//"NRSMat<std::complex<double> >& NRSMat<std::complex<double> >::operator*=(const std::complex<double> &)"
|
|
}
|
|
#endif
|
|
return *this;
|
|
}
|
|
|
|
|
|
/***************************************************************************//**
|
|
* multiply this symmetric matrix of general type <code>T</code> stored in packed form
|
|
* with scalar value of type <code>T</code>
|
|
* @param[in] a multiplicative factor of type <code>T</code>
|
|
* @return reference to the modified matrix
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline NRSMat<T> & NRSMat<T>::operator*=(const T &a) {
|
|
NOT_GPU(*this);
|
|
|
|
copyonwrite();
|
|
for(register size_t i = 0; i<NN2; ++i) v[i] *= a;
|
|
return *this;
|
|
}
|
|
|
|
|
|
/***************************************************************************//**
|
|
* add a scalar value \f$\alpha\f$ of general type <code>T</code> to the diagonal elements of this symmetric matrix of type <code>T</code>
|
|
* @param[in] a scalar value \f$\alpha\f$
|
|
* @return reference to the modified matrix
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline NRSMat<T> & NRSMat<T>::operator+=(const T &a) {
|
|
NOT_GPU(*this);
|
|
|
|
copyonwrite();
|
|
for(register int i = 0; i<nn; i++) v[i*(size_t)(i+1)/2 + i] += a;
|
|
return *this;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* subtract a scalar value \f$\alpha\f$ of general type <code>T</code> from the
|
|
* diagonal elements of this symmetric matrix of type <code>T</code>
|
|
* @param[in] a scalar value \f$\alpha\f$
|
|
* @return reference to the modified matrix
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline NRSMat<T> & NRSMat<T>::operator-=(const T &a) {
|
|
NOT_GPU(*this);
|
|
|
|
copyonwrite();
|
|
for(register int i = 0; i<nn; i++) v[i*(size_t)(i+1)/2+i] -= a;
|
|
return *this;
|
|
}
|
|
|
|
/*difference of absolute values*/
|
|
template <typename T>
|
|
NRSMat<typename LA_traits<T>::normtype> NRSMat<T>::diffabs(const NRSMat<T> &rhs) const {
|
|
#ifdef DEBUG
|
|
if (nn != rhs.nn) laerror("incompatible dimensions");
|
|
#endif
|
|
NOT_GPU(*this);
|
|
NOT_GPU(rhs);
|
|
|
|
NRSMat<typename LA_traits<T>::normtype> r(nn);
|
|
for(int i=0; i<nn; ++i) for(int j=0; j<=i; ++j) r(i,j) = MYABS((*this)(i,j)) - MYABS(rhs(i,j));
|
|
return r;
|
|
}
|
|
|
|
/*elementwise absolute values*/
|
|
template <typename T>
|
|
NRSMat<typename LA_traits<T>::normtype> NRSMat<T>::abs() const {
|
|
NOT_GPU(*this);
|
|
|
|
NRSMat<typename LA_traits<T>::normtype> r(nn);
|
|
for(int i=0; i<nn; ++i) for(int j=0; j<=i; ++j) r(i,j) = MYABS((*this)(i,j));
|
|
return r;
|
|
}
|
|
|
|
|
|
|
|
/***************************************************************************//**
|
|
* add up this real symmetric matrix with given symmetric matrix
|
|
* @param[in] rhs real symmetric matrix to be added
|
|
* @return reference to the modified matrix
|
|
******************************************************************************/
|
|
template<>
|
|
inline NRSMat<double>& NRSMat<double>::operator+=(const NRSMat<double> & rhs) {
|
|
#ifdef DEBUG
|
|
if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat<double>& NRSMat<double>::operator+=(const NRSMat<double> &)");
|
|
#endif
|
|
SAME_LOC(*this, rhs);
|
|
copyonwrite();
|
|
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
cblas_daxpy(NN2, 1.0, rhs.v, 1, v, 1);
|
|
#ifdef CUDALA
|
|
}else{
|
|
cublasDaxpy(NN2, 1.0, rhs.v, 1, v, 1);
|
|
TEST_CUBLAS("cublasDaxpy");//" NRSMat<double>& NRSMat<double>::operator+=(const NRSMat<double> &)"
|
|
}
|
|
#endif
|
|
return *this;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* add up this complex symmetric matrix with given symmetric matrix
|
|
* @param[in] rhs complex symmetric matrix to be added
|
|
* @return reference to the modified matrix
|
|
******************************************************************************/
|
|
template<>
|
|
inline NRSMat<std::complex<double> >& NRSMat<std::complex<double> >::operator+=(const NRSMat<std::complex<double> > & rhs) {
|
|
#ifdef DEBUG
|
|
if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat<std::complex<double> >& NRSMat<std::complex<double> >::operator+=(const NRSMat<std::complex<double> > &)");
|
|
#endif
|
|
SAME_LOC(*this, rhs);
|
|
copyonwrite();
|
|
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
cblas_zaxpy(NN2, &CONE, rhs.v, 1, v, 1);
|
|
#ifdef CUDALA
|
|
}else{
|
|
cublasZaxpy(NN2, CUONE, (cuDoubleComplex*)(rhs.v), 1, (cuDoubleComplex*)v, 1);
|
|
TEST_CUBLAS("cublasZaxpy");//"NRSMat<std::complex<double> >& NRSMat<std::complex<double> >::operator+=(const NRSMat<std::complex<double> > &)"
|
|
}
|
|
#endif
|
|
return *this;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* add up this symmetric matrix of general type <code>T</code> with given symmetric matrix
|
|
* @param[in] rhs complex matrix of general type <code>T</code> to be added
|
|
* @return reference to the modified matrix
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline NRSMat<T>& NRSMat<T>::operator+=(const NRSMat<T>& rhs) {
|
|
#ifdef DEBUG
|
|
if(nn != rhs.nn) laerror("incompatible NRSMat<T>& NRSMat<T>::operator+=(const NRSMat<T> &)");
|
|
#endif
|
|
NOT_GPU(*this);
|
|
SAME_LOC(*this, rhs);
|
|
|
|
copyonwrite();
|
|
for(register size_t i = 0; i<NN2; ++i) v[i] += rhs.v[i];
|
|
return *this;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* subtracts given real symmetric matrix from this real symmetric matrix
|
|
* @param[in] rhs real symmetric matrix to be subtracted
|
|
* @return reference to the modified matrix
|
|
******************************************************************************/
|
|
template<>
|
|
inline NRSMat<double>& NRSMat<double>::operator-=(const NRSMat<double>& rhs) {
|
|
#ifdef DEBUG
|
|
if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat<double>& NRSMat<double>::operator-=(const NRSMat<double> &)");
|
|
#endif
|
|
SAME_LOC(*this, rhs);
|
|
copyonwrite();
|
|
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
cblas_daxpy(NN2, -1.0, rhs.v, 1, v, 1);
|
|
#ifdef CUDALA
|
|
}else{
|
|
cublasDaxpy(NN2, -1.0, rhs.v, 1, v, 1);
|
|
TEST_CUBLAS("cublasDaxpy");//" NRSMat<double>& NRSMat<double>::operator-=(const NRSMat<double> &)"
|
|
}
|
|
#endif
|
|
return *this;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* subtracts given complex symmetric matrix from this complex symmetric matrix
|
|
* @param[in] rhs complex symmetric matrix to be subtracted
|
|
* @return reference to the modified matrix
|
|
******************************************************************************/
|
|
template<>
|
|
inline NRSMat<std::complex<double> >& NRSMat<std::complex<double> >::operator-=(const NRSMat<std::complex<double> >& rhs) {
|
|
#ifdef DEBUG
|
|
if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat<std::complex<double> >& NRSMat<std::complex<double> >::operator-=(const NRSMat<std::complex<double> > &)");
|
|
#endif
|
|
SAME_LOC(*this, rhs);
|
|
copyonwrite();
|
|
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
cblas_zaxpy(NN2, &CMONE, rhs.v, 1, v, 1);
|
|
#ifdef CUDALA
|
|
}else{
|
|
cublasZaxpy(NN2, CUMONE, (cuDoubleComplex*)(rhs.v), 1, (cuDoubleComplex*)v, 1);
|
|
TEST_CUBLAS("cublasZaxpy");//"NRSMat<std::complex<double> >& NRSMat<std::complex<double> >::operator-=(const NRSMat<std::complex<double> > &)"
|
|
}
|
|
#endif
|
|
return *this;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* subtracts given symmetric matrix of general type <code>T</code> from this symmetric matrix of type <code>T</code>
|
|
* @param[in] rhs symmetric matrix of general type <code>T</code> to be subtracted
|
|
* @return reference to the modified matrix
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline NRSMat<T>& NRSMat<T>::operator-=(const NRSMat<T>& rhs) {
|
|
#ifdef DEBUG
|
|
if(nn != rhs.nn) laerror("incompatible NRSMat<T>& NRSMat<T>::operator-=(const NRSMat<T> &)");
|
|
#endif
|
|
NOT_GPU(*this);
|
|
copyonwrite();
|
|
|
|
for(register size_t i = 0; i<NN2; ++i) v[i] -= rhs.v[i];
|
|
return *this;
|
|
}
|
|
|
|
|
|
/***************************************************************************//**
|
|
* add up given dense matrix of general type <code>T</code> with this symmetric matrix of type <code>T</code>
|
|
* @param[in] rhs dense matrix of type <code>T</code> to be added
|
|
* @return reference to the modified matrix
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline const NRMat<T> NRSMat<T>::operator+(const NRMat<T> &rhs) const {
|
|
return NRMat<T>(rhs) += *this;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* subtracts given dense matrix of general type <code>T</code> from this symmetric matrix of type <code>T</code>
|
|
* @param[in] rhs dense matrix of type <code>T</code> to be added
|
|
* @return reference to the modified matrix
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline const NRMat<T> NRSMat<T>::operator-(const NRMat<T> &rhs) const {
|
|
return NRMat<T>(-rhs) += *this;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* determine matrix element of this symmetric matrix of general type <code>T</code>
|
|
* using cumulative index increasing in a row-major way and corresponding to the
|
|
* lower triangular part of the respective dense matrix
|
|
* @param[in] ij index of the requested element
|
|
* @return reference to the corresponding matrix element
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline T& NRSMat<T>::operator[](const size_t ij) {
|
|
#ifdef DEBUG
|
|
if(_LA_count_check && *count != 1) laerror("T& NRSMat<T>::operator[] used for matrix with count>1");
|
|
if(ij<0 || ij>=NN2) laerror("T& NRSMat<T>::operator[] out of range");
|
|
if(!v) laerror("T& NRSMat<T>::operator[] used for unallocated NRSmat<T> object");
|
|
#endif
|
|
NOT_GPU(*this);
|
|
|
|
return v[ij];
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* determine matrix element of this symmetric matrix of general type <code>T</code>
|
|
* using cumulative index increasing in a row-major way and corresponding to the
|
|
* lower triangular part of the respective dense matrix, i.e. \f$A_{i,j}\f$ for
|
|
* \f$N>i\geq{}j\geq0\f$ corresponds to cumulative index \f$i(i+1)/2+j\f$
|
|
* @param[in] ij index of the requested element
|
|
* @return constant reference to the corresponding matrix element
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline const T & NRSMat<T>::operator[](const size_t ij) const {
|
|
#ifdef DEBUG
|
|
if(ij<0 || ij>=NN2) laerror("T& NRSMat<T>::operator[] out of range");
|
|
if(!v) laerror("T& NRSMat<T>::operator[] used for unallocated NRSmat<T> object");
|
|
#endif
|
|
NOT_GPU(*this);
|
|
|
|
return v[ij];
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* determine the cumulative index or matrix element in row \f$i\f$ and column \f$j\f$
|
|
* where \f$0\leq{}i,j<N\f$
|
|
* @param[in] i row index
|
|
* @param[in] i column index
|
|
* @return cumulative index
|
|
******************************************************************************/
|
|
template<typename T>
|
|
inline size_t SMat_index(T i, T j) {
|
|
return (i>=j) ? i*(size_t)(i+1)/2+j : j*(size_t)(j+1)/2+i;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* determine the cumulative index or matrix element in row \f$i\f$ and column \f$j\f$
|
|
* where \f$0\leq{}i,j<N\f$ for special case \f$i\geq{}j\f$
|
|
* @param[in] i row index
|
|
* @param[in] i column index
|
|
* @return cumulative index
|
|
******************************************************************************/
|
|
template<typename T>
|
|
inline size_t SMat_index_igej(T i, T j) {
|
|
return i*(size_t)(i+1)/2+j;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* determine the cumulative index or matrix element in row \f$i\f$ and column \f$j\f$
|
|
* where \f$0\leq{}i,j<N\f$ for special case \f$i\leq{}j\f$
|
|
* @param[in] i row index
|
|
* @param[in] i column index
|
|
* @return cumulative index
|
|
******************************************************************************/
|
|
template<typename T>
|
|
inline size_t SMat_index_ilej(T i, T j) {
|
|
return j*(size_t)(j+1)/2+i;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* determine the cumulative index or matrix element in row \f$i\f$ and column \f$j\f$
|
|
* where \f$1\leq{}i,j\leq{}N\f$
|
|
* @param[in] i row index
|
|
* @param[in] i column index
|
|
* @return cumulative index
|
|
******************************************************************************/
|
|
template<typename T>
|
|
inline size_t SMat_index_1(T i, T j) {
|
|
return (i>=j)? i*(size_t)(i-1)/2+j-1 : j*(size_t)(j-1)/2+i-1;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* determine the cumulative index or matrix element in row \f$i\f$ and column \f$j\f$
|
|
* where \f$1\leq{}i,j\leq{}N\f$ for special case \f$i\geq{}j\f$
|
|
* @param[in] i row index
|
|
* @param[in] i column index
|
|
* @return cumulative index
|
|
******************************************************************************/
|
|
template<typename T>
|
|
inline size_t SMat_index_1igej(T i, T j) {
|
|
return i*(size_t)(i-1)/2+j-1;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* determine the cumulative index or matrix element in row \f$i\f$ and column \f$j\f$
|
|
* where \f$1\leq{}i,j\leq{}N\f$ for special case \f$i\leq{}j\f$
|
|
* @param[in] i row index
|
|
* @param[in] i column index
|
|
* @return cumulative index
|
|
******************************************************************************/
|
|
template<typename T>
|
|
inline size_t SMat_index_1ilej(T i, T j) {
|
|
return j*(size_t)(j-1)/2+i-1;
|
|
}
|
|
|
|
//indexing for antisymmetric matrix (used by fourindex)
|
|
|
|
template<typename T>
|
|
inline size_t ASMat_index(T i, T j)
|
|
{
|
|
if(i == j) return -1;
|
|
return (i>j) ? i*(size_t)(i-1)/2+j : j*(size_t)(j-1)/2+i;
|
|
}
|
|
|
|
template<typename T>
|
|
inline size_t ASMat_index_1(T i, T j)
|
|
{
|
|
if(i == j) return -1;
|
|
return (i>j)? (i-2)*(i-1)/2+j-1 : (j-2)*(j-1)/2+i-1;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* determine matrix element of this symmetric matrix of general type <code>T</code>
|
|
* @param[in] i row index running from 0
|
|
* @param[in] j column index running from 0
|
|
* @return reference to the corresponding matrix element
|
|
* @see count, SMat_index, NRSMat<T>::operator[]
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline T & NRSMat<T>::operator()(const int i, const int j) {
|
|
#ifdef DEBUG
|
|
if(_LA_count_check && *count != 1) laerror("T & NRSMat<T>::operator()(const int, const int) used for matrix with count > 1");
|
|
if(i<0 || i>=nn || j<0 || j>=nn) laerror("T & NRSMat<T>::operator()(const int, const int) out of range");
|
|
if(!v) laerror("T & NRSMat<T>::operator()(const int, const int) used for unallocated NRSmat<T> object");
|
|
#endif
|
|
NOT_GPU(*this);
|
|
|
|
return v[SMat_index(i,j)];
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* determine matrix element of this symmetric matrix of general type <code>T</code>
|
|
* @param[in] i row index running from 0
|
|
* @param[in] j column index running from 0
|
|
* @return constant reference to the corresponding matrix element
|
|
* @see count, SMat_index, NRSMat<T>::operator[]
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline const T & NRSMat<T>::operator()(const int i, const int j) const {
|
|
#ifdef DEBUG
|
|
if(i<0 || i>=nn || j<0 || j>=nn) laerror("T & NRSMat<T>::operator()(const int, const int) out of range");
|
|
if(!v) laerror("T & NRSMat<T>::operator()(const int, const int) used for unallocated NRSmat<T> object");
|
|
#endif
|
|
NOT_GPU(*this);
|
|
|
|
return v[SMat_index(i,j)];
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* @return number of rows of this symmetric matrix of generalt type <code>T</code>
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline int NRSMat<T>::nrows() const {
|
|
return nn;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* @return number of columns of this symmetric matrix of generalt type <code>T</code>
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline int NRSMat<T>::ncols() const {
|
|
return nn;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* @return number of elements of this symmetric matrix of generalt type <code>T</code>
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline size_t NRSMat<T>::size() const {
|
|
return NN2;
|
|
}
|
|
|
|
|
|
/***************************************************************************//**
|
|
* for this real symmetric matrix \f$A\f$, determine the
|
|
* first element with largest absolute value
|
|
* @return \f$A_{l,m}\f$ which maximizes \f$\left|A_{i,j}\right|\f$
|
|
******************************************************************************/
|
|
template<>
|
|
inline const double NRSMat<double>::amax() const {
|
|
double ret(0.0);
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
ret = v[cblas_idamax(NN2, v, 1) - 1];
|
|
#ifdef CUDALA
|
|
}else{
|
|
const int pozice = cublasIdamax(NN2, v, 1) - 1;
|
|
TEST_CUBLAS("cublasIdamax");//"double NRSMat<double>::amax()"
|
|
|
|
gpuget(1, sizeof(double), v + pozice, &ret);
|
|
}
|
|
#endif
|
|
return ret;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* for this real symmetric matrix \f$A\f$, determine the
|
|
* first element with smallest absolute value
|
|
* @return \f$A_{l,m}\f$ which minimizes \f$\left|A_{i,j}\right|\f$
|
|
******************************************************************************/
|
|
template<>
|
|
inline const double NRSMat<double>::amin() const {
|
|
double ret(0.0);
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
// idamin seems not to be supported
|
|
double val(0.0);
|
|
int index(-1);
|
|
ret = std::numeric_limits<double>::max();
|
|
for(register size_t i = 0; i < NN2; i++){
|
|
val = std::abs(v[i]);
|
|
if(val < ret){ index = i; ret = val; }
|
|
}
|
|
ret = v[index];
|
|
#ifdef CUDALA
|
|
}else{
|
|
const int pozice = cublasIdamin(nn, v, 1) - 1;
|
|
TEST_CUBLAS("cublasIdamin");//"double NRSMat<double>::amin()"
|
|
gpuget(1, sizeof(double), v + pozice, &ret);
|
|
}
|
|
#endif
|
|
return ret;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* for this complex symmetric matrix \f$A\f$, determine the
|
|
* first element with largest "absolute value"
|
|
* @return \f$A_{l,m}\f$ which maximizes \f$\left|\Re{}A_{i,j}\right| + \left|\Im{}A_{i,j}\right|\f$
|
|
******************************************************************************/
|
|
template<>
|
|
inline const std::complex<double> NRSMat< std::complex<double> >::amax() const{
|
|
std::complex<double> ret(0., 0.);
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
ret = v[cblas_izamax(NN2, v, 1) - 1];
|
|
#ifdef CUDALA
|
|
}else{
|
|
const int pozice = cublasIzamax(NN2, (cuDoubleComplex*)v, 1) - 1;
|
|
TEST_CUBLAS("cublasIzamax");//"std::complex<double> NRSMat<std::complex<double> >::amax()"
|
|
gpuget(1, sizeof(std::complex<double>), v + pozice, &ret);
|
|
}
|
|
#endif
|
|
return ret;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* for this complex symmetric matrix \f$A\f$, determine the
|
|
* first element with smallest "absolute value"
|
|
* @return \f$A_{l,m}\f$ which minimizes \f$\left|\Re{}A_{i,j}\right| + \left|\Im{}A_{i,j}\right|\f$
|
|
******************************************************************************/
|
|
template<>
|
|
inline const std::complex<double> NRSMat<std::complex<double> >::amin() const{
|
|
std::complex<double> ret(0., 0.);
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
// izamin seems not to be supported
|
|
int index(0);
|
|
double val(0.0), min_val(0.0);
|
|
std::complex<double> z_val(0.0, 0.0);
|
|
|
|
min_val = std::numeric_limits<double>::max();
|
|
for(register size_t i = 0; i < NN2; i++){
|
|
z_val = v[i];
|
|
val = std::abs(z_val.real()) + std::abs(z_val.imag());
|
|
if(val < min_val){ index = i; min_val = val; }
|
|
}
|
|
ret = v[index];
|
|
#ifdef CUDALA
|
|
}else{
|
|
const int pozice = cublasIzamin(nn, (cuDoubleComplex*)v, 1) - 1;
|
|
TEST_CUBLAS("cublasIzamin");//"std::complex<double> NRSMat<std::complex<double> >::amin()"
|
|
gpuget(1, sizeof(std::complex<double>), v + pozice, &ret);
|
|
}
|
|
#endif
|
|
return ret;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* @return pointer of general type T to the underlying data structure
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline NRSMat<T>::operator T*() {
|
|
#ifdef DEBUG
|
|
if(!v) laerror("unallocated NRSMat object in NRSMat<T>::operator T*()");
|
|
#endif
|
|
return v;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* @return constant pointer of general type T to the underlying data structure
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline NRSMat<T>::operator const T*() const {
|
|
#ifdef DEBUG
|
|
if(!v) laerror("unallocated NRSMat object in NRSMat<T>::operator const T*()");
|
|
#endif
|
|
return v;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* destructor for general type <code>T</code>
|
|
* @see NRSMat<T>::count, NRSMat<T>::v
|
|
******************************************************************************/
|
|
template <typename T>
|
|
NRSMat<T>::~NRSMat() {
|
|
if(!count) return;
|
|
if(--(*count) <= 0) {
|
|
if(v){
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
delete[] v;
|
|
#ifdef CUDALA
|
|
}else{ gpufree(v); }
|
|
#endif
|
|
}
|
|
delete count;
|
|
}
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* assigment operator implementing deep copy of the reference NRSMat<T> object
|
|
* @see NRSMat<T>::operator=, NRSMat<T>::copyonwrite()
|
|
******************************************************************************/
|
|
template <typename T>
|
|
NRSMat<T> & NRSMat<T>::operator|=(const NRSMat<T> &rhs) {
|
|
#ifdef DEBUG
|
|
if(!rhs.v) laerror("unallocated NRSMat<T> object in NRSMat<T> & NRSMat<T>::operator|=(const NRSMat<T> &)");
|
|
#endif
|
|
if(this == &rhs) return *this;
|
|
*this = rhs;
|
|
this->copyonwrite();
|
|
return *this;
|
|
}
|
|
|
|
|
|
/***************************************************************************//**
|
|
* assignment operator implementing shallow copy of reference NRSMat<T> object
|
|
* @see NRSMat<T>::operator|=, NRSMat<T>::copyonwrite()
|
|
******************************************************************************/
|
|
template <typename T>
|
|
NRSMat<T> & NRSMat<T>::operator=(const NRSMat<T> & rhs) {
|
|
if(this == &rhs) return *this;
|
|
if(count)
|
|
if(--(*count) == 0){
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
delete [] v;
|
|
#ifdef CUDALA
|
|
}else{ gpufree(v); }
|
|
#endif
|
|
delete count;
|
|
}
|
|
v = rhs.v;
|
|
nn = rhs.nn;
|
|
count = rhs.count;
|
|
#ifdef CUDALA
|
|
location = rhs.location;
|
|
#endif
|
|
if(count) (*count)++;
|
|
return *this;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* detach this NRSMat<T> object and create own physical copy of the data
|
|
* @see NRSMat<T>::operator|=, NRSMat<T>::copyonwrite()
|
|
******************************************************************************/
|
|
template <typename T>
|
|
void NRSMat<T>::copyonwrite(bool detachonly, bool deep) {
|
|
if(!count)
|
|
{
|
|
if(nn) laerror("nonempty smat without reference count encountered");
|
|
if(_LA_warn_empty_copyonwrite) std::cout <<"Warning: copyonwrite of empty smat\n";
|
|
return;
|
|
}
|
|
if(*count == 1 && !LA_traits<T>::is_plaindata() && !detachonly && deep) //type-nested copyonwrite
|
|
{
|
|
#ifdef CUDALA
|
|
if(location != cpu) laerror("nested types not supported on gpu memory");
|
|
#endif
|
|
for(int i=0; i<NN2; ++i) LA_traits<T>::copyonwrite(v[i]);
|
|
}
|
|
|
|
if(*count > 1){
|
|
(*count)--;
|
|
count = new int;
|
|
*count = 1;
|
|
T *newv;
|
|
#ifdef CUDALA
|
|
if(location == cpu) {
|
|
#endif
|
|
newv = new T[NN2];
|
|
if(!detachonly)
|
|
{
|
|
if(LA_traits<T>::is_plaindata()) memcpy(newv, v, NN2*sizeof(T));
|
|
else
|
|
{
|
|
for(int i=0; i<NN2; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
|
|
}
|
|
}
|
|
#ifdef CUDALA
|
|
}else{
|
|
f(!LA_traits<T>::is_plaindata()) laerror("nested types not supported on gpu memory");
|
|
newv = (T *) gpualloc(NN2*sizeof(T));
|
|
if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRSMat<T>::copyonwrite()");
|
|
if(!detachonly) cublasScopy(NN2*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
|
|
TEST_CUBLAS("cublasScopy");//"NRSMat<T>::copyonwrite()"
|
|
}
|
|
#endif
|
|
|
|
v = newv;
|
|
}
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* resize this symmetric matrix of general type <code>T</code>
|
|
* @param[in] n requested number of rows (columns)
|
|
******************************************************************************/
|
|
template <typename T>
|
|
void NRSMat<T>::resize(const int n) {
|
|
#ifdef DEBUG
|
|
if(n < 0) laerror("illegal dimension in NRSMat<T>::resize(const int)");
|
|
#endif
|
|
if(count){
|
|
if(n == 0){
|
|
if(--(*count) <= 0) {
|
|
if(v) {
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
delete[] (v);
|
|
#ifdef CUDALA
|
|
}else{ gpufree(v); }
|
|
#endif
|
|
}
|
|
delete count;
|
|
}
|
|
count = 0;
|
|
nn = 0;
|
|
v = 0;
|
|
return;
|
|
}
|
|
if(*count > 1){
|
|
(*count)--;
|
|
count = 0;
|
|
v = 0;
|
|
nn = 0;
|
|
}
|
|
}
|
|
if(!count){
|
|
count = new int;
|
|
*count = 1;
|
|
nn = n;
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
v = new T[NN2];
|
|
#ifdef CUDALA
|
|
}else{ v = (T*) gpualloc(NN2*sizeof(T)); }
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
if(n != nn){
|
|
nn = n;
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
delete[] v;
|
|
v = new T[NN2];
|
|
#ifdef CUDALA
|
|
}else{
|
|
|
|
gpufree(v);
|
|
v = (T*) gpualloc(NN2*sizeof(T));
|
|
}
|
|
#endif
|
|
|
|
}
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* perform memory transfers between CPU and GPU memory
|
|
* @param[in] dest memory destination
|
|
* @see NRSMat<T>::location, NRSMat<T>::getlocation()
|
|
******************************************************************************/
|
|
#ifdef CUDALA
|
|
template<typename T>
|
|
void NRSMat<T>::moveto(const GPUID dest) {
|
|
if(location == dest) return;
|
|
|
|
CPU_GPU(location,dest);
|
|
location = dest;
|
|
|
|
if(v && !count) laerror("internal inconsistency of reference counting 1");
|
|
if(!count) return;
|
|
|
|
if(v && *count == 0) laerror("internal inconsistency of reference counting 2");
|
|
if(!v) return;
|
|
|
|
T *vold = v;
|
|
|
|
if(dest == cpu){ //moving from GPU to CPU
|
|
v = new T[NN2];
|
|
gpuget(NN2, sizeof(T), vold, v);
|
|
if(*count == 1) gpufree(vold);
|
|
else {--(*count); count = new int(1);}
|
|
|
|
}else{ //moving from CPU to GPU
|
|
|
|
v = (T *) gpualloc(NN2*sizeof(T));
|
|
gpuput(NN2, sizeof(T), vold, v);
|
|
if(*count == 1) delete[] vold;
|
|
else {--(*count); count = new int(1);}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
|
|
/***************************************************************************//**
|
|
* complexify a given matrix \f$A\f$ of general type <code>T</code>
|
|
* @param[in] rhs matrix \f$A\f$ intended for this operation
|
|
* @return matrix \f$B\f$ where \f$\Re B=A\f$ and \f$\Im B = 0\f$
|
|
******************************************************************************/
|
|
template<typename T>
|
|
NRSMat<std::complex<T> > complexify(const NRSMat<T> &rhs) {
|
|
NOT_GPU(rhs);
|
|
|
|
NRSMat<std::complex<T> > r(rhs.nrows());
|
|
for(register int i = 0; i<rhs.nrows(); ++i)
|
|
for(register int j = 0; j<=i; ++j) r(i,j) = rhs(i,j);
|
|
return r;
|
|
}
|
|
|
|
|
|
/***************************************************************************//**
|
|
* complexify a given double-precision real matrix \f$A\f$
|
|
* @param[in] rhs matrix \f$A\f$ intended for this operation
|
|
* @return matrix \f$B\f$ where \f$\Re B=A\f$ and \f$\Im B = 0\f$
|
|
******************************************************************************/
|
|
/*
|
|
template<>
|
|
NRSMat<std::complex<double> > complexify(const NRSMat<double> &rhs) {
|
|
NRSMat<std::complex<double> > r(rhs.nrows(), rhs.getlocation());
|
|
#ifdef CUDALA
|
|
if(rhs.getlocation() == cpu){
|
|
#endif
|
|
cblas_dcopy(rhs.size(), &(rhs[0]), 1, (double*)(&(r[0])), 2);
|
|
#ifdef CUDALA
|
|
}else{
|
|
cublasDcopy(rhs.size(), &(rhs[0]), 1, (double*)(&(r[0])), 2);
|
|
TEST_CUBLAS("cublasDcopy");//"NRSMat<std::complex<double> > complexify(const NRSMat<double> &)"
|
|
}
|
|
#endif
|
|
return r;
|
|
}
|
|
*/
|
|
/***************************************************************************//**
|
|
* output operator
|
|
* @param[in,out] s output stream
|
|
* @param[in] x NRSMat<T> matrix to be printed out
|
|
* @return modified stream
|
|
******************************************************************************/
|
|
template <typename T>
|
|
std::ostream& operator<<(std::ostream &s, const NRSMat<T> &x) {
|
|
#ifdef CUDALA
|
|
if(x.getlocation() == cpu){
|
|
#endif
|
|
int i,j,n;
|
|
n = x.nrows();
|
|
s << n << ' ' << n << '\n';
|
|
for(i = 0;i<n;i++){
|
|
for(j = 0; j<n;j++) s << (typename LA_traits_io<T>::IOtype)x(i,j) << (j == n-1 ? '\n' : ' ');
|
|
}
|
|
return s;
|
|
#ifdef CUDALA
|
|
}else{
|
|
NRSMat<T> tmp = x;
|
|
tmp.moveto(cpu);
|
|
return s<<tmp;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
/***************************************************************************//**
|
|
* input operator
|
|
* @param[in,out] s input stream
|
|
* @param[in] x NRSMat<T> matrix for storing the input
|
|
* @return modified stream
|
|
******************************************************************************/
|
|
template <typename T>
|
|
std::istream& operator>>(std::istream &s, NRSMat<T> &x) {
|
|
#ifdef CUDALA
|
|
if(x.getlocation() == cpu){
|
|
#endif
|
|
int i,j,n,m;
|
|
s >> n >> m;
|
|
if(n!=m) laerror("input symmetric matrix not square");
|
|
x.resize(n);
|
|
typename LA_traits_io<T>::IOtype tmp;
|
|
for(i = 0;i<n;i++) for(j = 0; j<m;j++) {s>>tmp; x(i,j)=tmp;}
|
|
return s;
|
|
#ifdef CUDALA
|
|
}else{
|
|
NRSMat<T> tmp;
|
|
tmp.moveto(cpu);
|
|
s >> tmp;
|
|
tmp.moveto(x.getlocation());
|
|
x = tmp;
|
|
return s;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
//convert whole matrix between types
|
|
template <typename T, typename S>
|
|
void NRSMat_convert(NRSMat<T> &a, const NRSMat<S> &b)
|
|
{
|
|
a.resize(b.nrows(),b.ncols());
|
|
for(int i=0; i<b.nrows(); ++i)
|
|
for(int j=0; j<=i; ++j)
|
|
a(i,j) = (T) b(i,j);
|
|
}
|
|
|
|
|
|
/***************************************************************************//**
|
|
* generate operators relating NRSMat<T> objects and scalars
|
|
* corresponding macro is defined in vec.h
|
|
******************************************************************************/
|
|
NRVECMAT_OPER(SMat,+)
|
|
NRVECMAT_OPER(SMat,-)
|
|
NRVECMAT_OPER(SMat,*)
|
|
|
|
/***************************************************************************//**
|
|
* generate operators relating in general two NRSMat<T> objects
|
|
* corresponding macro is defined in vec.h
|
|
******************************************************************************/
|
|
NRVECMAT_OPER2(SMat,+)
|
|
NRVECMAT_OPER2(SMat,-)
|
|
|
|
/***************************************************************************//**
|
|
* class implementing NRSMat<T> funcitonality with indices running from 1
|
|
* allmost all function members are inherited, only constructors are given explicitly
|
|
******************************************************************************/
|
|
template<typename T>
|
|
class NRSMat_from1 : public NRSMat<T> {
|
|
public:
|
|
NRSMat_from1(): NRSMat<T>() {};
|
|
explicit NRSMat_from1(const int n): NRSMat<T>(n) {};
|
|
NRSMat_from1(const NRSMat<T> &rhs): NRSMat<T>(rhs) {}; //be able to convert the parent class transparently to this
|
|
NRSMat_from1(const T &a, const int n): NRSMat<T>(a,n) {};
|
|
NRSMat_from1(const T *a, const int n): NRSMat<T>(a,n) {};
|
|
explicit NRSMat_from1(const NRMat<T> &rhs): NRSMat<T>(rhs) {};
|
|
explicit NRSMat_from1(const NRVec<T> &rhs, const int n): NRSMat<T>(rhs,n) {};
|
|
|
|
inline const T& operator() (const int i, const int j) const {
|
|
#ifdef DEBUG
|
|
if(i<=0||j<=0||i>NRSMat<T>::nn||j>NRSMat<T>::nn) laerror("index in const T& NRSMat<T>::operator() (const int, const int) out of range");
|
|
#endif
|
|
return NRSMat<T>::v[SMat_index_1(i,j)];
|
|
}
|
|
|
|
inline T& operator() (const int i, const int j){
|
|
#ifdef DEBUG
|
|
if(i<=0||j<=0||i>NRSMat<T>::nn||j>NRSMat<T>::nn) laerror("index in T& NRSMat<T>::operator() (const int, const int) out of range");
|
|
#endif
|
|
return NRSMat<T>::v[SMat_index_1(i,j)];
|
|
}
|
|
};
|
|
|
|
}//namespace
|
|
#endif /* _LA_SMAT_H_ */
|