LA_library/smat.h

1292 lines
45 KiB
C++

//------------------------------------------------------------------------------
/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
//------------------------------------------------------------------------------
/*
LA: linear algebra C++ interface library
Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
complex versions written by Roman Curik <roman.curik@jh-inst.cas.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _LA_SMAT_H_
#define _LA_SMAT_H_
#include "la_traits.h"
namespace LA {
#define NN2 ((size_t)nn*(nn+1)/2)
//forward declaration
template<typename T> class NRPerm;
template<typename T> class NRSMat_from1;
/***************************************************************************//**
* This class implements a general symmetric or hermitian matrix the elements
* of which are stored in packed form. Particularly the lower triangular part
* of a symmetric or hermitian matrix of order \f$N\f$ is interpreted as a
* vector of length \f$N(N+1)/2\f$ in row-major storage scheme.
******************************************************************************/
template <class T>
class NRSMat{
protected:
int nn;//!< number of rows/columns of this symmetric matrix
T *v;//!< internal pointer to the underlying data structure
int *count;//!< pointer to the reference counter
#ifdef CUDALA
GPUID location;//!< specification of memory (GPU/CPU) location where this objects resides
#endif
public:
friend class NRVec<T>;
friend class NRMat<T>;
~NRSMat();
//! default constructor of null-matrix
inline NRSMat() : nn(0),v(0),count(0) {
#ifdef CUDALA
location = DEFAULT_LOC;
#endif
};
//! default constructor
inline explicit NRSMat(const int n, const GPUID loc = undefined);
//! constructor initializing the matrix being created by given scalar value
inline NRSMat(const T &a, const int n);
//! constructor initializing the matrix being created by data located at given memory position
inline NRSMat(const T *a, const int n);
//! copy constructor
inline NRSMat(const NRSMat &rhs);
//! constructor converting real matrix to its complex counterpart
NRSMat(const typename LA_traits_complex<T>::NRSMat_Noncomplex_type &rhs, bool imagpart = false);
//! constructor creating symmetric part of a general matrix
explicit NRSMat(const NRMat<T> &rhs);
//! construct symmetric matrix by filling the lower triangle with data stored in a vector
explicit NRSMat(const NRVec<T> &rhs, const int n);
//! assignment operator performing shallow copy
NRSMat & operator=(const NRSMat &rhs);
//! assignment operator performing deep copy
NRSMat & operator|=(const NRSMat &rhs);
//! fill the matrix with pseudorandom numbers (uniform distribution)
void randomize(const typename LA_traits<T>::normtype &x);
//! assign scalar value to diagonal elements
NRSMat & operator=(const T &a);
//! unit matrix for general power
void identity() {*this = (T)1;}
//! inverse matrix
NRSMat inverse();
//! permute matrix elements
const NRSMat permuted(const NRPerm<int> &p, const bool inverse=false) const;
inline int getcount() const {return count?*count:0;}
#ifdef CUDALA
inline GPUID getlocation() const {return location;}
void moveto(const GPUID dest);
#else
inline GPUID getlocation() const {return cpu;}
void moveto(const GPUID dest) {};
#endif
//! relational operator for testing nonequality
const bool operator!=(const NRSMat &rhs) const {if(nn!=rhs.nn) return 1; return LA_traits<T>::gencmp(v,rhs.v,NN2);};
//! relational operator for testing equality
const bool operator==(const NRSMat &rhs) const {return !(*this != rhs);};
inline NRSMat & operator*=(const T &a);
inline NRSMat & operator+=(const T &a);
inline NRSMat & operator-=(const T &a);
inline NRSMat & operator+=(const NRSMat &rhs);
inline NRSMat & operator-=(const NRSMat &rhs);
const NRSMat operator-() const;
inline const NRSMat operator*(const T &a) const;
inline const NRSMat operator+(const T &a) const;
inline const NRSMat operator-(const T &a) const;
inline const NRSMat operator+(const NRSMat &rhs) const;
inline const NRSMat operator-(const NRSMat &rhs) const;
inline const NRMat<T> operator+(const NRMat<T> &rhs) const;
inline const NRMat<T> operator-(const NRMat<T> &rhs) const;
const NRMat<T> operator*(const NRSMat &rhs) const;
const NRMat<T> operator*(const NRMat<T> &rhs) const;
const T dot(const NRSMat &rhs) const;
const T dot(const NRVec<T> &rhs) const;
const NRVec<T> operator*(const NRVec<T> &rhs) const {NRVec<T> result(nn,rhs.getlocation()); result.gemv((T)0,*this,'n',(T)1,rhs); return result;};
const NRVec<std::complex<T> > operator*(const NRVec<std::complex<T> > &rhs) const {NRVec<std::complex<T> > result(nn,rhs.getlocation()); result.gemv((T)0,*this,'n',(T)1,rhs); return result;};
const T* diagonalof(NRVec<T> &, const bool divide = 0, bool cache = false) const;
void gemv(const T beta, NRVec<T> &r, const char trans, const T alpha, const NRVec<T> &x) const {r.gemv(beta,*this,trans,alpha,x);};
void gemv(const T beta, NRVec<std::complex<T> > &r, const char trans, const T alpha, const NRVec<std::complex<T> > &x) const {r.gemv(beta,*this,trans,alpha,x);};
inline const T& operator[](const size_t ij) const;
inline T& operator[](const size_t ij);
inline const T& operator()(const int i, const int j) const;
inline T& operator()(const int i, const int j);
inline int nrows() const;
inline int ncols() const;
inline size_t size() const; //NOTE it is the size of data n*(n+1)/2, not nrows
inline bool transp(const int i, const int j) const {return i>j;} //this can be used for compact storage of matrices, which are actually not symmetric, but one triangle of them is redundant
const typename LA_traits<T>::normtype norm(const T scalar = (T)0) const;
void axpy(const T alpha, const NRSMat &x); // this+= a*x
inline const T amax() const;
inline const T amin() const;
const T trace() const;
void get(int fd, bool dimensions = 1, bool transp = 0);
void put(int fd, bool dimensions = 1, bool transp = 0) const;
void copyonwrite(bool detachonly=false, bool deep=true);
void clear() {copyonwrite(LA_traits<T>::is_plaindata()); LA_traits<T>::clear(v,NN2);}; //zero out
void resize(const int n);
void dealloc(void) {resize(0);}
//! extract specified submatrix
const NRSMat submatrix(const int from, const int to) const;
const NRSMat submatrix(const NRVec<int> &selection) const;
//! store given matrix at given position into the current matrix
void storesubmatrix(const int from, const NRSMat &rhs);
void storesubmatrix(const NRVec<int> &selection, const NRSMat &rhs);
inline operator T*();
inline operator const T*() const;
void fprintf(FILE *f, const char *format, const int modulo) const;
void fscanf(FILE *f, const char *format);
//members concerning sparse matrix
explicit NRSMat(const SparseMat<T> &rhs); // dense from sparse
explicit NRSMat(const SparseSMat<T> &rhs); // dense from sparse
inline void simplify() {}; //just for compatibility with sparse ones
bool issymmetric() const {return 1;}
void swap(NRSMat &rhs) //more efficient swap than via tmp and constructors and operator=
{
int tmpnn=nn; nn=rhs.nn; rhs.nn=tmpnn;
T *tmpv=v; v=rhs.v; rhs.v=tmpv;
int *tmpcount=count; count=rhs.count; rhs.count=tmpcount;
#ifdef CUDALA
GPUID tmplocation=location; location=rhs.location; rhs.location=tmplocation;
#endif
}
NRSMat<typename LA_traits<T>::normtype> diffabs(const NRSMat &rhs) const; //difference of absolute values
NRSMat<typename LA_traits<T>::normtype> abs() const; //elementwise absolute values
};
}//namespace
//due to mutual includes this has to be after full class declaration
#include "vec.h"
#include "mat.h"
#include "permutation.h"
namespace LA {
/***************************************************************************//**
* constructor of a symmetric matrix stored in packed form
* @param[in] n number of rows of the matrix being created
* @param[in] loc location for storing the matrix
* @see count, v, location
******************************************************************************/
template <typename T>
inline NRSMat<T>::NRSMat(const int n, const GPUID loc): nn(n), count(new int(1)) {
#ifdef CUDALA
location = (loc == undefined?DEFAULT_LOC:loc);
if(location == cpu){
#endif
v = new T[NN2];
#ifdef CUDALA
}else{
v = (T*) gpualloc(NN2*sizeof(T));
}
#endif
}
/***************************************************************************//**
* constructor of a symmetric matrix stored in packed form (default location in used)
* @param[in] a set all matrix elements equal to this value
* @param[in] n number of rows of the matrix being created
* @see count, v, location, NRSMat<T>::NRSMat(const int, const GPUID)
******************************************************************************/
template <typename T>
inline NRSMat<T>::NRSMat(const T& a, const int n) : nn(n), count(new int(1)) {
#ifdef CUDALA
location = DEFAULT_LOC;
if(location == cpu){
#endif
v = new T[NN2];
if(!LA_traits<T>::is_plaindata() || a != (T)0) for(register size_t i = 0; i<NN2; i++) v[i] = a;
else memset(v, 0, NN2*sizeof(T));
#ifdef CUDALA
}else{
v = (T*) gpualloc(NN2*sizeof(T));
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
cublasSetVector(NN2, sizeof(T), &a, 0, v, 1);
}
#endif
}
/***************************************************************************//**
* constructor of a symmetric matrix stored in packed form (default location in used)
* @param[in] a pointer to data of type T used for matrix inicialization
* @param[in] n number of rows of the matrix being created
* @see count, v, location, NRSMat<T>::NRSMat(const int, const GPUID), NRSMat<T>::NRSMat(const T&, const int)
******************************************************************************/
template <typename T>
inline NRSMat<T>::NRSMat(const T *a, const int n) : nn(n), count(new int(1)) {
#ifdef CUDALA
location = DEFAULT_LOC;
if(location == cpu){
#endif
if(LA_traits<T>::is_plaindata()) memcpy(v, a, NN2*sizeof(T));
else for( int i=0; i<NN2; i++) v[i] = a[i];
#ifdef CUDALA
}else{
v = (T*) gpualloc(NN2*sizeof(T));
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
cublasSetVector(NN2, sizeof(T), a, 1, v, 1);
}
#endif
}
/***************************************************************************//**
* copy constructor implementing shallow copy
* @param[in] rhs reference matrix being copied
* @see count, v, location
******************************************************************************/
template <typename T>
inline NRSMat<T>::NRSMat(const NRSMat<T> &rhs) {
#ifdef CUDALA
location = rhs.location;
#endif
v = rhs.v;
nn = rhs.nn;
count = rhs.count;
if(count) (*count)++;
}
/***************************************************************************//**
* constructor interpreting a vector of \f$n(n+1)/2\f$ elements as a symmetric
* matrix stored in packed form having \f$n\f$ rows
* @param[in] rhs reference matrix being copied
* @param[in] n count of rows of the matrix being created
******************************************************************************/
template <typename T>
NRSMat<T>::NRSMat(const NRVec<T> &rhs, const int n) {
#ifdef CUDALA
location = rhs.location;
#endif
nn = n;
#ifdef DEBUG
if(NN2 != rhs.size()){ laerror("incompatible dimensions in NRSMat<T>::NRSMat(const NRVec<T>&, const int)"); }
#endif
count = rhs.count;
v = rhs.v;
(*count)++;
}
/***************************************************************************//**
* multiply this real symmetric matrix with real scalar value
* @param[in] a real multiplicative factor
* @return reference to the modified matrix
******************************************************************************/
template<>
inline NRSMat<double> & NRSMat<double>::operator*=(const double &a) {
copyonwrite();
#ifdef CUDALA
if(location == cpu){
#endif
cblas_dscal(NN2, a, v, 1);
#ifdef CUDALA
}else{
cublasDscal(NN2, a, v, 1);
TEST_CUBLAS("cublasDscal");//"NRSMat<double>& NRSMat<double>::operator*=(const double &)"
}
#endif
return *this;
}
/***************************************************************************//**
* multiply this complex symmetric matrix with complex scalar value
* @param[in] a complex multiplicative factor
* @return reference to the modified matrix
******************************************************************************/
template<>
inline NRSMat<std::complex<double> > &
NRSMat<std::complex<double> >::operator*=(const std::complex<double> &a) {
copyonwrite();
#ifdef CUDALA
if(location == cpu){
#endif
cblas_zscal(NN2, &a, v, 1);
#ifdef CUDALA
}else{
const cuDoubleComplex _a = make_cuDoubleComplex(a.real(), a.imag());
cublasZscal(NN2, _a, (cuDoubleComplex*)v, 1);
TEST_CUBLAS("cublasZscal");//"NRSMat<std::complex<double> >& NRSMat<std::complex<double> >::operator*=(const std::complex<double> &)"
}
#endif
return *this;
}
/***************************************************************************//**
* multiply this symmetric matrix of general type <code>T</code> stored in packed form
* with scalar value of type <code>T</code>
* @param[in] a multiplicative factor of type <code>T</code>
* @return reference to the modified matrix
******************************************************************************/
template <typename T>
inline NRSMat<T> & NRSMat<T>::operator*=(const T &a) {
NOT_GPU(*this);
copyonwrite();
for(register size_t i = 0; i<NN2; ++i) v[i] *= a;
return *this;
}
/***************************************************************************//**
* add a scalar value \f$\alpha\f$ of general type <code>T</code> to the diagonal elements of this symmetric matrix of type <code>T</code>
* @param[in] a scalar value \f$\alpha\f$
* @return reference to the modified matrix
******************************************************************************/
template <typename T>
inline NRSMat<T> & NRSMat<T>::operator+=(const T &a) {
NOT_GPU(*this);
copyonwrite();
for(register int i = 0; i<nn; i++) v[i*(size_t)(i+1)/2 + i] += a;
return *this;
}
/***************************************************************************//**
* subtract a scalar value \f$\alpha\f$ of general type <code>T</code> from the
* diagonal elements of this symmetric matrix of type <code>T</code>
* @param[in] a scalar value \f$\alpha\f$
* @return reference to the modified matrix
******************************************************************************/
template <typename T>
inline NRSMat<T> & NRSMat<T>::operator-=(const T &a) {
NOT_GPU(*this);
copyonwrite();
for(register int i = 0; i<nn; i++) v[i*(size_t)(i+1)/2+i] -= a;
return *this;
}
/*difference of absolute values*/
template <typename T>
NRSMat<typename LA_traits<T>::normtype> NRSMat<T>::diffabs(const NRSMat<T> &rhs) const {
#ifdef DEBUG
if (nn != rhs.nn) laerror("incompatible dimensions");
#endif
NOT_GPU(*this);
NOT_GPU(rhs);
NRSMat<typename LA_traits<T>::normtype> r(nn);
for(int i=0; i<nn; ++i) for(int j=0; j<=i; ++j) r(i,j) = MYABS((*this)(i,j)) - MYABS(rhs(i,j));
return r;
}
/*elementwise absolute values*/
template <typename T>
NRSMat<typename LA_traits<T>::normtype> NRSMat<T>::abs() const {
NOT_GPU(*this);
NRSMat<typename LA_traits<T>::normtype> r(nn);
for(int i=0; i<nn; ++i) for(int j=0; j<=i; ++j) r(i,j) = MYABS((*this)(i,j));
return r;
}
/***************************************************************************//**
* add up this real symmetric matrix with given symmetric matrix
* @param[in] rhs real symmetric matrix to be added
* @return reference to the modified matrix
******************************************************************************/
template<>
inline NRSMat<double>& NRSMat<double>::operator+=(const NRSMat<double> & rhs) {
#ifdef DEBUG
if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat<double>& NRSMat<double>::operator+=(const NRSMat<double> &)");
#endif
SAME_LOC(*this, rhs);
copyonwrite();
#ifdef CUDALA
if(location == cpu){
#endif
cblas_daxpy(NN2, 1.0, rhs.v, 1, v, 1);
#ifdef CUDALA
}else{
cublasDaxpy(NN2, 1.0, rhs.v, 1, v, 1);
TEST_CUBLAS("cublasDaxpy");//" NRSMat<double>& NRSMat<double>::operator+=(const NRSMat<double> &)"
}
#endif
return *this;
}
/***************************************************************************//**
* add up this complex symmetric matrix with given symmetric matrix
* @param[in] rhs complex symmetric matrix to be added
* @return reference to the modified matrix
******************************************************************************/
template<>
inline NRSMat<std::complex<double> >& NRSMat<std::complex<double> >::operator+=(const NRSMat<std::complex<double> > & rhs) {
#ifdef DEBUG
if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat<std::complex<double> >& NRSMat<std::complex<double> >::operator+=(const NRSMat<std::complex<double> > &)");
#endif
SAME_LOC(*this, rhs);
copyonwrite();
#ifdef CUDALA
if(location == cpu){
#endif
cblas_zaxpy(NN2, &CONE, rhs.v, 1, v, 1);
#ifdef CUDALA
}else{
cublasZaxpy(NN2, CUONE, (cuDoubleComplex*)(rhs.v), 1, (cuDoubleComplex*)v, 1);
TEST_CUBLAS("cublasZaxpy");//"NRSMat<std::complex<double> >& NRSMat<std::complex<double> >::operator+=(const NRSMat<std::complex<double> > &)"
}
#endif
return *this;
}
/***************************************************************************//**
* add up this symmetric matrix of general type <code>T</code> with given symmetric matrix
* @param[in] rhs complex matrix of general type <code>T</code> to be added
* @return reference to the modified matrix
******************************************************************************/
template <typename T>
inline NRSMat<T>& NRSMat<T>::operator+=(const NRSMat<T>& rhs) {
#ifdef DEBUG
if(nn != rhs.nn) laerror("incompatible NRSMat<T>& NRSMat<T>::operator+=(const NRSMat<T> &)");
#endif
NOT_GPU(*this);
SAME_LOC(*this, rhs);
copyonwrite();
for(register size_t i = 0; i<NN2; ++i) v[i] += rhs.v[i];
return *this;
}
/***************************************************************************//**
* subtracts given real symmetric matrix from this real symmetric matrix
* @param[in] rhs real symmetric matrix to be subtracted
* @return reference to the modified matrix
******************************************************************************/
template<>
inline NRSMat<double>& NRSMat<double>::operator-=(const NRSMat<double>& rhs) {
#ifdef DEBUG
if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat<double>& NRSMat<double>::operator-=(const NRSMat<double> &)");
#endif
SAME_LOC(*this, rhs);
copyonwrite();
#ifdef CUDALA
if(location == cpu){
#endif
cblas_daxpy(NN2, -1.0, rhs.v, 1, v, 1);
#ifdef CUDALA
}else{
cublasDaxpy(NN2, -1.0, rhs.v, 1, v, 1);
TEST_CUBLAS("cublasDaxpy");//" NRSMat<double>& NRSMat<double>::operator-=(const NRSMat<double> &)"
}
#endif
return *this;
}
/***************************************************************************//**
* subtracts given complex symmetric matrix from this complex symmetric matrix
* @param[in] rhs complex symmetric matrix to be subtracted
* @return reference to the modified matrix
******************************************************************************/
template<>
inline NRSMat<std::complex<double> >& NRSMat<std::complex<double> >::operator-=(const NRSMat<std::complex<double> >& rhs) {
#ifdef DEBUG
if(nn != rhs.nn) laerror("incompatible dimensions in NRSMat<std::complex<double> >& NRSMat<std::complex<double> >::operator-=(const NRSMat<std::complex<double> > &)");
#endif
SAME_LOC(*this, rhs);
copyonwrite();
#ifdef CUDALA
if(location == cpu){
#endif
cblas_zaxpy(NN2, &CMONE, rhs.v, 1, v, 1);
#ifdef CUDALA
}else{
cublasZaxpy(NN2, CUMONE, (cuDoubleComplex*)(rhs.v), 1, (cuDoubleComplex*)v, 1);
TEST_CUBLAS("cublasZaxpy");//"NRSMat<std::complex<double> >& NRSMat<std::complex<double> >::operator-=(const NRSMat<std::complex<double> > &)"
}
#endif
return *this;
}
/***************************************************************************//**
* subtracts given symmetric matrix of general type <code>T</code> from this symmetric matrix of type <code>T</code>
* @param[in] rhs symmetric matrix of general type <code>T</code> to be subtracted
* @return reference to the modified matrix
******************************************************************************/
template <typename T>
inline NRSMat<T>& NRSMat<T>::operator-=(const NRSMat<T>& rhs) {
#ifdef DEBUG
if(nn != rhs.nn) laerror("incompatible NRSMat<T>& NRSMat<T>::operator-=(const NRSMat<T> &)");
#endif
NOT_GPU(*this);
copyonwrite();
for(register size_t i = 0; i<NN2; ++i) v[i] -= rhs.v[i];
return *this;
}
/***************************************************************************//**
* add up given dense matrix of general type <code>T</code> with this symmetric matrix of type <code>T</code>
* @param[in] rhs dense matrix of type <code>T</code> to be added
* @return reference to the modified matrix
******************************************************************************/
template <typename T>
inline const NRMat<T> NRSMat<T>::operator+(const NRMat<T> &rhs) const {
return NRMat<T>(rhs) += *this;
}
/***************************************************************************//**
* subtracts given dense matrix of general type <code>T</code> from this symmetric matrix of type <code>T</code>
* @param[in] rhs dense matrix of type <code>T</code> to be added
* @return reference to the modified matrix
******************************************************************************/
template <typename T>
inline const NRMat<T> NRSMat<T>::operator-(const NRMat<T> &rhs) const {
return NRMat<T>(-rhs) += *this;
}
/***************************************************************************//**
* determine matrix element of this symmetric matrix of general type <code>T</code>
* using cumulative index increasing in a row-major way and corresponding to the
* lower triangular part of the respective dense matrix
* @param[in] ij index of the requested element
* @return reference to the corresponding matrix element
******************************************************************************/
template <typename T>
inline T& NRSMat<T>::operator[](const size_t ij) {
#ifdef DEBUG
if(_LA_count_check && *count != 1) laerror("T& NRSMat<T>::operator[] used for matrix with count>1");
if(ij<0 || ij>=NN2) laerror("T& NRSMat<T>::operator[] out of range");
if(!v) laerror("T& NRSMat<T>::operator[] used for unallocated NRSmat<T> object");
#endif
NOT_GPU(*this);
return v[ij];
}
/***************************************************************************//**
* determine matrix element of this symmetric matrix of general type <code>T</code>
* using cumulative index increasing in a row-major way and corresponding to the
* lower triangular part of the respective dense matrix, i.e. \f$A_{i,j}\f$ for
* \f$N>i\geq{}j\geq0\f$ corresponds to cumulative index \f$i(i+1)/2+j\f$
* @param[in] ij index of the requested element
* @return constant reference to the corresponding matrix element
******************************************************************************/
template <typename T>
inline const T & NRSMat<T>::operator[](const size_t ij) const {
#ifdef DEBUG
if(ij<0 || ij>=NN2) laerror("T& NRSMat<T>::operator[] out of range");
if(!v) laerror("T& NRSMat<T>::operator[] used for unallocated NRSmat<T> object");
#endif
NOT_GPU(*this);
return v[ij];
}
/***************************************************************************//**
* determine the cumulative index or matrix element in row \f$i\f$ and column \f$j\f$
* where \f$0\leq{}i,j<N\f$
* @param[in] i row index
* @param[in] i column index
* @return cumulative index
******************************************************************************/
template<typename T>
inline size_t SMat_index(T i, T j) {
return (i>=j) ? i*(size_t)(i+1)/2+j : j*(size_t)(j+1)/2+i;
}
/***************************************************************************//**
* determine the cumulative index or matrix element in row \f$i\f$ and column \f$j\f$
* where \f$0\leq{}i,j<N\f$ for special case \f$i\geq{}j\f$
* @param[in] i row index
* @param[in] i column index
* @return cumulative index
******************************************************************************/
template<typename T>
inline size_t SMat_index_igej(T i, T j) {
return i*(size_t)(i+1)/2+j;
}
/***************************************************************************//**
* determine the cumulative index or matrix element in row \f$i\f$ and column \f$j\f$
* where \f$0\leq{}i,j<N\f$ for special case \f$i\leq{}j\f$
* @param[in] i row index
* @param[in] i column index
* @return cumulative index
******************************************************************************/
template<typename T>
inline size_t SMat_index_ilej(T i, T j) {
return j*(size_t)(j+1)/2+i;
}
/***************************************************************************//**
* determine the cumulative index or matrix element in row \f$i\f$ and column \f$j\f$
* where \f$1\leq{}i,j\leq{}N\f$
* @param[in] i row index
* @param[in] i column index
* @return cumulative index
******************************************************************************/
template<typename T>
inline size_t SMat_index_1(T i, T j) {
return (i>=j)? i*(size_t)(i-1)/2+j-1 : j*(size_t)(j-1)/2+i-1;
}
/***************************************************************************//**
* determine the cumulative index or matrix element in row \f$i\f$ and column \f$j\f$
* where \f$1\leq{}i,j\leq{}N\f$ for special case \f$i\geq{}j\f$
* @param[in] i row index
* @param[in] i column index
* @return cumulative index
******************************************************************************/
template<typename T>
inline size_t SMat_index_1igej(T i, T j) {
return i*(size_t)(i-1)/2+j-1;
}
/***************************************************************************//**
* determine the cumulative index or matrix element in row \f$i\f$ and column \f$j\f$
* where \f$1\leq{}i,j\leq{}N\f$ for special case \f$i\leq{}j\f$
* @param[in] i row index
* @param[in] i column index
* @return cumulative index
******************************************************************************/
template<typename T>
inline size_t SMat_index_1ilej(T i, T j) {
return j*(size_t)(j-1)/2+i-1;
}
//indexing for antisymmetric matrix (used by fourindex)
template<typename T>
inline size_t ASMat_index(T i, T j)
{
if(i == j) return -1;
return (i>j) ? i*(size_t)(i-1)/2+j : j*(size_t)(j-1)/2+i;
}
template<typename T>
inline size_t ASMat_index_1(T i, T j)
{
if(i == j) return -1;
return (i>j)? (i-2)*(i-1)/2+j-1 : (j-2)*(j-1)/2+i-1;
}
/***************************************************************************//**
* determine matrix element of this symmetric matrix of general type <code>T</code>
* @param[in] i row index running from 0
* @param[in] j column index running from 0
* @return reference to the corresponding matrix element
* @see count, SMat_index, NRSMat<T>::operator[]
******************************************************************************/
template <typename T>
inline T & NRSMat<T>::operator()(const int i, const int j) {
#ifdef DEBUG
if(_LA_count_check && *count != 1) laerror("T & NRSMat<T>::operator()(const int, const int) used for matrix with count > 1");
if(i<0 || i>=nn || j<0 || j>=nn) laerror("T & NRSMat<T>::operator()(const int, const int) out of range");
if(!v) laerror("T & NRSMat<T>::operator()(const int, const int) used for unallocated NRSmat<T> object");
#endif
NOT_GPU(*this);
return v[SMat_index(i,j)];
}
/***************************************************************************//**
* determine matrix element of this symmetric matrix of general type <code>T</code>
* @param[in] i row index running from 0
* @param[in] j column index running from 0
* @return constant reference to the corresponding matrix element
* @see count, SMat_index, NRSMat<T>::operator[]
******************************************************************************/
template <typename T>
inline const T & NRSMat<T>::operator()(const int i, const int j) const {
#ifdef DEBUG
if(i<0 || i>=nn || j<0 || j>=nn) laerror("T & NRSMat<T>::operator()(const int, const int) out of range");
if(!v) laerror("T & NRSMat<T>::operator()(const int, const int) used for unallocated NRSmat<T> object");
#endif
NOT_GPU(*this);
return v[SMat_index(i,j)];
}
/***************************************************************************//**
* @return number of rows of this symmetric matrix of generalt type <code>T</code>
******************************************************************************/
template <typename T>
inline int NRSMat<T>::nrows() const {
return nn;
}
/***************************************************************************//**
* @return number of columns of this symmetric matrix of generalt type <code>T</code>
******************************************************************************/
template <typename T>
inline int NRSMat<T>::ncols() const {
return nn;
}
/***************************************************************************//**
* @return number of elements of this symmetric matrix of generalt type <code>T</code>
******************************************************************************/
template <typename T>
inline size_t NRSMat<T>::size() const {
return NN2;
}
/***************************************************************************//**
* for this real symmetric matrix \f$A\f$, determine the
* first element with largest absolute value
* @return \f$A_{l,m}\f$ which maximizes \f$\left|A_{i,j}\right|\f$
******************************************************************************/
template<>
inline const double NRSMat<double>::amax() const {
double ret(0.0);
#ifdef CUDALA
if(location == cpu){
#endif
ret = v[cblas_idamax(NN2, v, 1) - 1];
#ifdef CUDALA
}else{
const int pozice = cublasIdamax(NN2, v, 1) - 1;
TEST_CUBLAS("cublasIdamax");//"double NRSMat<double>::amax()"
gpuget(1, sizeof(double), v + pozice, &ret);
}
#endif
return ret;
}
/***************************************************************************//**
* for this real symmetric matrix \f$A\f$, determine the
* first element with smallest absolute value
* @return \f$A_{l,m}\f$ which minimizes \f$\left|A_{i,j}\right|\f$
******************************************************************************/
template<>
inline const double NRSMat<double>::amin() const {
double ret(0.0);
#ifdef CUDALA
if(location == cpu){
#endif
// idamin seems not to be supported
double val(0.0);
int index(-1);
ret = std::numeric_limits<double>::max();
for(register size_t i = 0; i < NN2; i++){
val = std::abs(v[i]);
if(val < ret){ index = i; ret = val; }
}
ret = v[index];
#ifdef CUDALA
}else{
const int pozice = cublasIdamin(nn, v, 1) - 1;
TEST_CUBLAS("cublasIdamin");//"double NRSMat<double>::amin()"
gpuget(1, sizeof(double), v + pozice, &ret);
}
#endif
return ret;
}
/***************************************************************************//**
* for this complex symmetric matrix \f$A\f$, determine the
* first element with largest "absolute value"
* @return \f$A_{l,m}\f$ which maximizes \f$\left|\Re{}A_{i,j}\right| + \left|\Im{}A_{i,j}\right|\f$
******************************************************************************/
template<>
inline const std::complex<double> NRSMat< std::complex<double> >::amax() const{
std::complex<double> ret(0., 0.);
#ifdef CUDALA
if(location == cpu){
#endif
ret = v[cblas_izamax(NN2, v, 1) - 1];
#ifdef CUDALA
}else{
const int pozice = cublasIzamax(NN2, (cuDoubleComplex*)v, 1) - 1;
TEST_CUBLAS("cublasIzamax");//"std::complex<double> NRSMat<std::complex<double> >::amax()"
gpuget(1, sizeof(std::complex<double>), v + pozice, &ret);
}
#endif
return ret;
}
/***************************************************************************//**
* for this complex symmetric matrix \f$A\f$, determine the
* first element with smallest "absolute value"
* @return \f$A_{l,m}\f$ which minimizes \f$\left|\Re{}A_{i,j}\right| + \left|\Im{}A_{i,j}\right|\f$
******************************************************************************/
template<>
inline const std::complex<double> NRSMat<std::complex<double> >::amin() const{
std::complex<double> ret(0., 0.);
#ifdef CUDALA
if(location == cpu){
#endif
// izamin seems not to be supported
int index(0);
double val(0.0), min_val(0.0);
std::complex<double> z_val(0.0, 0.0);
min_val = std::numeric_limits<double>::max();
for(register size_t i = 0; i < NN2; i++){
z_val = v[i];
val = std::abs(z_val.real()) + std::abs(z_val.imag());
if(val < min_val){ index = i; min_val = val; }
}
ret = v[index];
#ifdef CUDALA
}else{
const int pozice = cublasIzamin(nn, (cuDoubleComplex*)v, 1) - 1;
TEST_CUBLAS("cublasIzamin");//"std::complex<double> NRSMat<std::complex<double> >::amin()"
gpuget(1, sizeof(std::complex<double>), v + pozice, &ret);
}
#endif
return ret;
}
/***************************************************************************//**
* @return pointer of general type T to the underlying data structure
******************************************************************************/
template <typename T>
inline NRSMat<T>::operator T*() {
#ifdef DEBUG
if(!v) laerror("unallocated NRSMat object in NRSMat<T>::operator T*()");
#endif
return v;
}
/***************************************************************************//**
* @return constant pointer of general type T to the underlying data structure
******************************************************************************/
template <typename T>
inline NRSMat<T>::operator const T*() const {
#ifdef DEBUG
if(!v) laerror("unallocated NRSMat object in NRSMat<T>::operator const T*()");
#endif
return v;
}
/***************************************************************************//**
* destructor for general type <code>T</code>
* @see NRSMat<T>::count, NRSMat<T>::v
******************************************************************************/
template <typename T>
NRSMat<T>::~NRSMat() {
if(!count) return;
if(--(*count) <= 0) {
if(v){
#ifdef CUDALA
if(location == cpu){
#endif
delete[] v;
#ifdef CUDALA
}else{ gpufree(v); }
#endif
}
delete count;
}
}
/***************************************************************************//**
* assigment operator implementing deep copy of the reference NRSMat<T> object
* @see NRSMat<T>::operator=, NRSMat<T>::copyonwrite()
******************************************************************************/
template <typename T>
NRSMat<T> & NRSMat<T>::operator|=(const NRSMat<T> &rhs) {
#ifdef DEBUG
if(!rhs.v) laerror("unallocated NRSMat<T> object in NRSMat<T> & NRSMat<T>::operator|=(const NRSMat<T> &)");
#endif
if(this == &rhs) return *this;
*this = rhs;
this->copyonwrite();
return *this;
}
/***************************************************************************//**
* assignment operator implementing shallow copy of reference NRSMat<T> object
* @see NRSMat<T>::operator|=, NRSMat<T>::copyonwrite()
******************************************************************************/
template <typename T>
NRSMat<T> & NRSMat<T>::operator=(const NRSMat<T> & rhs) {
if(this == &rhs) return *this;
if(count)
if(--(*count) == 0){
#ifdef CUDALA
if(location == cpu){
#endif
delete [] v;
#ifdef CUDALA
}else{ gpufree(v); }
#endif
delete count;
}
v = rhs.v;
nn = rhs.nn;
count = rhs.count;
#ifdef CUDALA
location = rhs.location;
#endif
if(count) (*count)++;
return *this;
}
/***************************************************************************//**
* detach this NRSMat<T> object and create own physical copy of the data
* @see NRSMat<T>::operator|=, NRSMat<T>::copyonwrite()
******************************************************************************/
template <typename T>
void NRSMat<T>::copyonwrite(bool detachonly, bool deep) {
if(!count)
{
if(nn) laerror("nonempty smat without reference count encountered");
if(_LA_warn_empty_copyonwrite) std::cout <<"Warning: copyonwrite of empty smat\n";
return;
}
if(*count == 1 && !LA_traits<T>::is_plaindata() && !detachonly && deep) //type-nested copyonwrite
{
#ifdef CUDALA
if(location != cpu) laerror("nested types not supported on gpu memory");
#endif
for(int i=0; i<NN2; ++i) LA_traits<T>::copyonwrite(v[i]);
}
if(*count > 1){
(*count)--;
count = new int;
*count = 1;
T *newv;
#ifdef CUDALA
if(location == cpu) {
#endif
newv = new T[NN2];
if(!detachonly)
{
if(LA_traits<T>::is_plaindata()) memcpy(newv, v, NN2*sizeof(T));
else
{
for(int i=0; i<NN2; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
}
}
#ifdef CUDALA
}else{
f(!LA_traits<T>::is_plaindata()) laerror("nested types not supported on gpu memory");
newv = (T *) gpualloc(NN2*sizeof(T));
if(sizeof(T)%sizeof(float) != 0) laerror("memory alignment problem in NRSMat<T>::copyonwrite()");
if(!detachonly) cublasScopy(NN2*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
TEST_CUBLAS("cublasScopy");//"NRSMat<T>::copyonwrite()"
}
#endif
v = newv;
}
}
/***************************************************************************//**
* resize this symmetric matrix of general type <code>T</code>
* @param[in] n requested number of rows (columns)
******************************************************************************/
template <typename T>
void NRSMat<T>::resize(const int n) {
#ifdef DEBUG
if(n < 0) laerror("illegal dimension in NRSMat<T>::resize(const int)");
#endif
if(count){
if(n == 0){
if(--(*count) <= 0) {
if(v) {
#ifdef CUDALA
if(location == cpu){
#endif
delete[] (v);
#ifdef CUDALA
}else{ gpufree(v); }
#endif
}
delete count;
}
count = 0;
nn = 0;
v = 0;
return;
}
if(*count > 1){
(*count)--;
count = 0;
v = 0;
nn = 0;
}
}
if(!count){
count = new int;
*count = 1;
nn = n;
#ifdef CUDALA
if(location == cpu){
#endif
v = new T[NN2];
#ifdef CUDALA
}else{ v = (T*) gpualloc(NN2*sizeof(T)); }
#endif
return;
}
if(n != nn){
nn = n;
#ifdef CUDALA
if(location == cpu){
#endif
delete[] v;
v = new T[NN2];
#ifdef CUDALA
}else{
gpufree(v);
v = (T*) gpualloc(NN2*sizeof(T));
}
#endif
}
}
/***************************************************************************//**
* perform memory transfers between CPU and GPU memory
* @param[in] dest memory destination
* @see NRSMat<T>::location, NRSMat<T>::getlocation()
******************************************************************************/
#ifdef CUDALA
template<typename T>
void NRSMat<T>::moveto(const GPUID dest) {
if(location == dest) return;
CPU_GPU(location,dest);
location = dest;
if(v && !count) laerror("internal inconsistency of reference counting 1");
if(!count) return;
if(v && *count == 0) laerror("internal inconsistency of reference counting 2");
if(!v) return;
T *vold = v;
if(dest == cpu){ //moving from GPU to CPU
v = new T[NN2];
gpuget(NN2, sizeof(T), vold, v);
if(*count == 1) gpufree(vold);
else {--(*count); count = new int(1);}
}else{ //moving from CPU to GPU
v = (T *) gpualloc(NN2*sizeof(T));
gpuput(NN2, sizeof(T), vold, v);
if(*count == 1) delete[] vold;
else {--(*count); count = new int(1);}
}
}
#endif
/***************************************************************************//**
* complexify a given matrix \f$A\f$ of general type <code>T</code>
* @param[in] rhs matrix \f$A\f$ intended for this operation
* @return matrix \f$B\f$ where \f$\Re B=A\f$ and \f$\Im B = 0\f$
******************************************************************************/
template<typename T>
NRSMat<std::complex<T> > complexify(const NRSMat<T> &rhs) {
NOT_GPU(rhs);
NRSMat<std::complex<T> > r(rhs.nrows());
for(register int i = 0; i<rhs.nrows(); ++i)
for(register int j = 0; j<=i; ++j) r(i,j) = rhs(i,j);
return r;
}
/***************************************************************************//**
* complexify a given double-precision real matrix \f$A\f$
* @param[in] rhs matrix \f$A\f$ intended for this operation
* @return matrix \f$B\f$ where \f$\Re B=A\f$ and \f$\Im B = 0\f$
******************************************************************************/
/*
template<>
NRSMat<std::complex<double> > complexify(const NRSMat<double> &rhs) {
NRSMat<std::complex<double> > r(rhs.nrows(), rhs.getlocation());
#ifdef CUDALA
if(rhs.getlocation() == cpu){
#endif
cblas_dcopy(rhs.size(), &(rhs[0]), 1, (double*)(&(r[0])), 2);
#ifdef CUDALA
}else{
cublasDcopy(rhs.size(), &(rhs[0]), 1, (double*)(&(r[0])), 2);
TEST_CUBLAS("cublasDcopy");//"NRSMat<std::complex<double> > complexify(const NRSMat<double> &)"
}
#endif
return r;
}
*/
/***************************************************************************//**
* output operator
* @param[in,out] s output stream
* @param[in] x NRSMat<T> matrix to be printed out
* @return modified stream
******************************************************************************/
template <typename T>
std::ostream& operator<<(std::ostream &s, const NRSMat<T> &x) {
#ifdef CUDALA
if(x.getlocation() == cpu){
#endif
int i,j,n;
n = x.nrows();
s << n << ' ' << n << '\n';
for(i = 0;i<n;i++){
for(j = 0; j<n;j++) s << (typename LA_traits_io<T>::IOtype)x(i,j) << (j == n-1 ? '\n' : ' ');
}
return s;
#ifdef CUDALA
}else{
NRSMat<T> tmp = x;
tmp.moveto(cpu);
return s<<tmp;
}
#endif
}
/***************************************************************************//**
* input operator
* @param[in,out] s input stream
* @param[in] x NRSMat<T> matrix for storing the input
* @return modified stream
******************************************************************************/
template <typename T>
std::istream& operator>>(std::istream &s, NRSMat<T> &x) {
#ifdef CUDALA
if(x.getlocation() == cpu){
#endif
int i,j,n,m;
s >> n >> m;
if(n!=m) laerror("input symmetric matrix not square");
x.resize(n);
typename LA_traits_io<T>::IOtype tmp;
for(i = 0;i<n;i++) for(j = 0; j<m;j++) {s>>tmp; x(i,j)=tmp;}
return s;
#ifdef CUDALA
}else{
NRSMat<T> tmp;
tmp.moveto(cpu);
s >> tmp;
tmp.moveto(x.getlocation());
x = tmp;
return s;
}
#endif
}
//convert whole matrix between types
template <typename T, typename S>
void NRSMat_convert(NRSMat<T> &a, const NRSMat<S> &b)
{
a.resize(b.nrows(),b.ncols());
for(int i=0; i<b.nrows(); ++i)
for(int j=0; j<=i; ++j)
a(i,j) = (T) b(i,j);
}
/***************************************************************************//**
* generate operators relating NRSMat<T> objects and scalars
* corresponding macro is defined in vec.h
******************************************************************************/
NRVECMAT_OPER(SMat,+)
NRVECMAT_OPER(SMat,-)
NRVECMAT_OPER(SMat,*)
/***************************************************************************//**
* generate operators relating in general two NRSMat<T> objects
* corresponding macro is defined in vec.h
******************************************************************************/
NRVECMAT_OPER2(SMat,+)
NRVECMAT_OPER2(SMat,-)
/***************************************************************************//**
* class implementing NRSMat<T> funcitonality with indices running from 1
* allmost all function members are inherited, only constructors are given explicitly
******************************************************************************/
template<typename T>
class NRSMat_from1 : public NRSMat<T> {
public:
NRSMat_from1(): NRSMat<T>() {};
explicit NRSMat_from1(const int n): NRSMat<T>(n) {};
NRSMat_from1(const NRSMat<T> &rhs): NRSMat<T>(rhs) {}; //be able to convert the parent class transparently to this
NRSMat_from1(const T &a, const int n): NRSMat<T>(a,n) {};
NRSMat_from1(const T *a, const int n): NRSMat<T>(a,n) {};
explicit NRSMat_from1(const NRMat<T> &rhs): NRSMat<T>(rhs) {};
explicit NRSMat_from1(const NRVec<T> &rhs, const int n): NRSMat<T>(rhs,n) {};
inline const T& operator() (const int i, const int j) const {
#ifdef DEBUG
if(i<=0||j<=0||i>NRSMat<T>::nn||j>NRSMat<T>::nn) laerror("index in const T& NRSMat<T>::operator() (const int, const int) out of range");
#endif
return NRSMat<T>::v[SMat_index_1(i,j)];
}
inline T& operator() (const int i, const int j){
#ifdef DEBUG
if(i<=0||j<=0||i>NRSMat<T>::nn||j>NRSMat<T>::nn) laerror("index in T& NRSMat<T>::operator() (const int, const int) out of range");
#endif
return NRSMat<T>::v[SMat_index_1(i,j)];
}
};
}//namespace
#endif /* _LA_SMAT_H_ */