1582 lines
50 KiB
C++
1582 lines
50 KiB
C++
/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
|
|
/*******************************************************************************
|
|
LA: linear algebra C++ interface library
|
|
Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
|
|
complex versions written by Roman Curik <roman.curik@jh-inst.cas.cz>
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*******************************************************************************/
|
|
#ifndef _LA_MAT_H_
|
|
#define _LA_MAT_H_
|
|
#include "la_traits.h"
|
|
|
|
using namespace LA_Vecmat3;
|
|
|
|
#include "vecmat3.h"
|
|
|
|
|
|
namespace LA {
|
|
|
|
//forward declaration
|
|
template<typename T> class NRPerm;
|
|
template<typename T> class CyclePerm;
|
|
template<typename T> class NRMat_from1;
|
|
|
|
|
|
/***************************************************************************//**
|
|
* \brief NRMat<T> class template implementing the matrix interface
|
|
* @see NRVec<T>, NRSMat<T>
|
|
******************************************************************************/
|
|
template <typename T>
|
|
class NRMat{
|
|
protected:
|
|
int nn;//!< number of rows
|
|
int mm;//!< number of columns
|
|
#ifdef MATPTR
|
|
T **v;//!< pointer to the array of pointers pointing to the beginings of individual rows
|
|
#else
|
|
T *v;//!< pointer to the data stored continuously in emmory
|
|
#endif
|
|
int *count;//!< reference counter
|
|
public:
|
|
#ifdef CUDALA
|
|
GPUID location;
|
|
#endif
|
|
friend class NRVec<T>;
|
|
friend class NRSMat<T>;
|
|
friend class NRMat_from1<T>;
|
|
friend class NRSMat_from1<T>;
|
|
|
|
//! standard destructor
|
|
~NRMat();
|
|
|
|
/***************************************************************************//**
|
|
* \brief inlined constructor creating zero matrix of general type <code>T</code>
|
|
******************************************************************************/
|
|
inline NRMat() : nn(0), mm(0), v(0), count(0){
|
|
#ifdef CUDALA
|
|
location = DEFAULT_LOC;
|
|
#endif
|
|
};
|
|
|
|
/***************************************************************************//**
|
|
* \brief Inlined constructor creating matrix of given size and location.
|
|
* Because of performance reasons, no incialization is done.
|
|
* @param[in] n vector size (count of elements)
|
|
* @param[in] loc location of the underlying data (CPU/GPU)
|
|
******************************************************************************/
|
|
inline NRMat(const int n, const int m, const GPUID loc = undefined);
|
|
|
|
//! inlined constructor creating matrix of given size filled with prescribed value and stored at given location
|
|
inline NRMat(const T &a, const int n, const int m, const GPUID loc);
|
|
|
|
//! inlined constructor creating matrix of given size filled with prescribed value
|
|
inline NRMat(const T &a, const int n, const int m);
|
|
|
|
//! inlined constructor creating matrix of given size filled with data located at given memory location
|
|
inline NRMat(const T *a, const int n, const int m);
|
|
inline NRMat(const Mat3<T> &rhs) : NRMat(&rhs(0,0),3,3) {};
|
|
|
|
//! inlined constructor creating matrix of given size from an array
|
|
template<int R, int C> inline NRMat(const T (&a)[R][C]);
|
|
|
|
//! inlined copy-constructor
|
|
inline NRMat(const NRMat &rhs);
|
|
|
|
//! complexifying constructor
|
|
NRMat(const typename LA_traits_complex<T>::NRMat_Noncomplex_type &rhs, bool imagpart = false);
|
|
//! explicit decomplexifying constructor
|
|
explicit NRMat(const NRMat<std::complex<T> > &rhs, bool imagpart = false);
|
|
|
|
//! explicit constructor converting symmetric matrix stored in packed form into a <code>NRMat<T></code> object
|
|
explicit NRMat(const NRSMat<T> &rhs);
|
|
|
|
//! explicit constructor converting vector into a <code>NRMat<T></code> object
|
|
#ifdef MATPTR
|
|
explicit NRMat(const NRVec<T> &rhs, const int n, const int m, const int offset = 0):NRMat(&rhs[0][0] + offset , n, m){
|
|
if (offset < 0 || (size_t)n*m + offset > rhs.nn) laerror("matrix dimensions and offset incompatible with vector length");
|
|
};
|
|
#else
|
|
explicit NRMat(const NRVec<T> &rhs, const int n, const int m, const int offset = 0);
|
|
#endif
|
|
|
|
#ifdef MATPTR
|
|
const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return LA_traits<T>::gencmp(v[0],rhs.v[0],(size_t)nn*mm);} //memcmp for scalars else elementwise
|
|
#else
|
|
const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return LA_traits<T>::gencmp(v,rhs.v,(size_t)nn*mm);} //memcmp for scalars else elementwise
|
|
#endif
|
|
|
|
const bool operator==(const NRMat &rhs) const {return !(*this != rhs);};
|
|
|
|
//! determine the count of references to this object
|
|
inline int getcount() const {return count?*count:0;}
|
|
|
|
//! ensure that the data of this matrix are referenced exactly once
|
|
void copyonwrite(bool detachonly=false);
|
|
|
|
//! permute matrix elements
|
|
const NRMat permuted_rows(const NRPerm<int> &p, const bool inverse=false) const;
|
|
const NRMat permuted_cols(const NRPerm<int> &p, const bool inverse=false) const;
|
|
const NRMat permuted_both(const NRPerm<int> &p, const NRPerm<int> &q, const bool inverse=false) const;
|
|
void permuteme_rows(const CyclePerm<int> &p); //in place
|
|
void permuteme_cols(const CyclePerm<int> &p); //in place
|
|
void scale_row(const int i, const T f); //in place
|
|
void scale_col(const int i, const T f); //in place
|
|
void axpy(const T alpha, const NRPerm<int> &p, const bool direction);
|
|
explicit NRMat(const NRPerm<int> &p, const bool direction, const bool parity=false); //permutation matrix
|
|
|
|
|
|
/***************************************************************************//**
|
|
* routines for CUDA related stuff
|
|
* \li <code>getlocation()</code> gets the protected data member location
|
|
* \li <code>moveto(const GPUID)</code> moves underlying data between CPU/GPU memory
|
|
******************************************************************************/
|
|
#ifdef CUDALA
|
|
inline GPUID getlocation() const {return location;}
|
|
void moveto(const GPUID dest);
|
|
#else
|
|
inline GPUID getlocation() const {return cpu;}
|
|
void moveto(const GPUID dest) {};
|
|
#endif
|
|
|
|
//! fill the matrix with pseudorandom numbers (uniform distribution)
|
|
void randomize(const typename LA_traits<T>::normtype &x);
|
|
|
|
//! assigment operator performing shallow copy
|
|
NRMat & operator=(const NRMat &rhs);
|
|
//! assigment operator performing deep copy
|
|
NRMat & operator|=(const NRMat &rhs);
|
|
|
|
//! assign scalar value to the diagonal elements
|
|
NRMat & operator=(const T &a);
|
|
|
|
//! unit matrix for general power
|
|
void identity() {*this = (T)1;}
|
|
|
|
//! inverse matrix
|
|
NRMat inverse();
|
|
|
|
//! add scalar value to the diagonal elements
|
|
NRMat & operator+=(const T &a);
|
|
//! subtract scalar value to the diagonal elements
|
|
NRMat & operator-=(const T &a);
|
|
|
|
//! multiply by a scalar value
|
|
NRMat & operator*=(const T &a);
|
|
|
|
//! add given matrix
|
|
NRMat & operator+=(const NRMat &rhs);
|
|
//! subtract given matrix
|
|
NRMat & operator-=(const NRMat &rhs);
|
|
//! Hadamard element-wise product
|
|
NRMat & operator^=(const NRMat<T> &rhs);
|
|
|
|
//! add symmetric matrix stored in packed form
|
|
NRMat & operator+=(const NRSMat<T> &rhs);
|
|
//! subtract symmetric matrix stored in packed form
|
|
NRMat & operator-=(const NRSMat<T> &rhs);
|
|
|
|
//! unary minus
|
|
const NRMat operator-() const;
|
|
|
|
//! add scalar value to all matrix elements and return the result by value
|
|
inline const NRMat operator+(const T &a) const;
|
|
//! subtract scalar value from all matrix elements and return the result by value
|
|
inline const NRMat operator-(const T &a) const;
|
|
//! multiply all matrix elements by a scalar value and return the result by value
|
|
inline const NRMat operator*(const T &a) const;
|
|
|
|
//! add given matrix and return the result by value
|
|
inline const NRMat operator+(const NRMat &rhs) const;
|
|
//! add given symmetric matrix stored in packed form and return the result by value
|
|
inline const NRMat operator+(const NRSMat<T> &rhs) const;
|
|
|
|
//! subtract given matrix and return the result by value
|
|
inline const NRMat operator-(const NRMat &rhs) const;
|
|
//! subtract given symmetric matrix stored in packed form and return the result by value
|
|
inline const NRMat operator-(const NRSMat<T> &rhs) const;
|
|
|
|
//! multiply by given matrix and return the result by value
|
|
const NRMat operator*(const NRMat &rhs) const;
|
|
//! multiply by given symmetric matrix stored in packed form and return the result by value
|
|
const NRMat operator*(const NRSMat<T> &rhs) const;
|
|
|
|
//! direct sum of two matrices
|
|
const NRMat operator&(const NRMat &rhs) const;
|
|
//! direct product of two matrices
|
|
const NRMat operator|(const NRMat<T> &rhs) const;
|
|
|
|
//! multiply by a vector
|
|
const NRVec<T> operator*(const NRVec<T> &rhs) const {
|
|
NRVec<T> result(nn, rhs.getlocation());
|
|
result.gemv((T)0, *this, 'n', (T)1, rhs);
|
|
return result;
|
|
};
|
|
//! multiply this matrix of general type <code>T</code> by vector of type <code>complex<T></code>
|
|
const NRVec<std::complex<T> > operator*(const NRVec<std::complex<T> > &rhs) const {
|
|
NRVec<std::complex<T> > result(nn, rhs.getlocation());
|
|
result.gemv((T)0, *this, 'n', (T)1, rhs);
|
|
return result;
|
|
};
|
|
|
|
//! inner product of two matrices (taking conjugation into account in the complex case)
|
|
const T dot(const NRMat &rhs) const;
|
|
|
|
//! direct sum
|
|
const NRMat oplus(const NRMat &rhs) const;
|
|
//! direct product
|
|
const NRMat otimes(const NRMat &rhs, bool reversecolumns = false) const;
|
|
|
|
//! multiply by diagonal matrix from left
|
|
void diagmultl(const NRVec<T> &rhs);
|
|
//! multiply by diagonal matrix from right
|
|
void diagmultr(const NRVec<T> &rhs);
|
|
|
|
//! for this matrix \f$A\f$ compute \f$A^\mathrm{T}\cdot{}A\f$
|
|
const NRSMat<T> transposedtimes() const;
|
|
//! for this matrix \f$A\f$ compute \f$A\cdot{}A^\mathrm{T}\f$
|
|
const NRSMat<T> timestransposed() const;
|
|
|
|
//! sum the rows
|
|
const NRVec<T> rsum() const;
|
|
//! sum the columns
|
|
const NRVec<T> csum() const;
|
|
|
|
//! orthonormalize this matrix
|
|
void orthonormalize(const bool rowcol, const NRSMat<T> *metric = NULL);
|
|
|
|
//! get the i<sup>th</sup> row
|
|
const NRVec<T> row(const int i, int l = -1) const;
|
|
|
|
//! get the j<sup>th</sup> column
|
|
const NRVec<T> column(const int j, int l = -1) const {
|
|
NOT_GPU(*this);
|
|
if(l < 0) l = nn;
|
|
NRVec<T> r(l);
|
|
for(register int i=0; i<l; ++i) r[i] = (*this)(i,j);
|
|
return r;
|
|
};
|
|
|
|
//! extract the digonal elements of this matrix and store them into a vector
|
|
const T* diagonalof(NRVec<T> &, const bool divide = 0, bool cache = false) const;
|
|
//! set diagonal elements
|
|
void diagonalset(const NRVec<T> &);
|
|
|
|
//! perform the <b>gemv</b> operation with vector of type <code>T</code>
|
|
void gemv(const T beta, NRVec<T> &r, const char trans, const T alpha, const NRVec<T> &x) const { r.gemv(beta, *this, trans, alpha, x); };
|
|
//! perform the <b>gemv</b> operation with vector of type <code>complex<T></code>
|
|
void gemv(const T beta, NRVec<std::complex<T> > &r, const char trans, const T alpha, const NRVec<std::complex<T> > &x) const { r.gemv(beta, *this, trans, alpha, x); };
|
|
|
|
//! determine the pointer to the i<sup>th</sup> row
|
|
inline T* operator[](const int i);
|
|
//! determine the const pointer to the i<sup>th</sup> row
|
|
inline const T* operator[](const int i) const;
|
|
|
|
//! get the reference to the element with indices (i,j)
|
|
inline T& operator()(const int i, const int j);
|
|
//! get the const reference to the element with indices (i,j)
|
|
inline const T& operator()(const int i, const int j) const;
|
|
//! get the copy of the element with indices (i,j)
|
|
inline const T get_ij(const int i, const int j) const;
|
|
|
|
//! get the number of rows
|
|
inline int nrows() const;
|
|
//! get the number of columns
|
|
inline int ncols() const;
|
|
//! get the number of matrix elements
|
|
inline size_t size() const;
|
|
|
|
//! unformatted input
|
|
void get(int fd, bool dimensions = 1, bool transposed = false);
|
|
//! unformatted output
|
|
void put(int fd, bool dimensions = 1, bool transposed = false) const;
|
|
//! formatted output
|
|
void fprintf(FILE *f, const char *format, const int modulo) const;
|
|
//! formatted input
|
|
void fscanf(FILE *f, const char *format);
|
|
|
|
//! set all matrix elements equal to zero
|
|
void clear(){
|
|
if(nn&&mm){
|
|
copyonwrite(LA_traits<T>::is_plaindata());
|
|
LA_traits<T>::clear((*this)[0], (size_t)nn*mm);
|
|
}
|
|
};
|
|
|
|
//! resize the matrix
|
|
void resize(int n, int m);
|
|
|
|
//! deallocate the matrix
|
|
void dealloc(void) {resize(0,0);}
|
|
|
|
//! get the pointer to the data
|
|
inline operator T*();
|
|
|
|
//! get the const pointer to the data
|
|
inline operator const T*() const;
|
|
|
|
//! in case of square matrix, transpose the leading minor of order <code>n</code>
|
|
NRMat& transposeme(const int n = 0);
|
|
//! conjugate a square matrix
|
|
NRMat& conjugateme();
|
|
|
|
//! transpose this matrix and return the result by value
|
|
const NRMat transpose(bool conj = false) const;
|
|
//! conjugate this matrix and return the result by value
|
|
const NRMat conjugate() const {NRMat r(*this); r.conjugateme(); return r;};
|
|
|
|
//! extract specified submatrix
|
|
const NRMat submatrix(const int fromrow, const int torow, const int fromcol, const int tocol) const;
|
|
const NRMat submatrix(const NRVec<int> &rows, const NRVec<int> &cols) const;
|
|
|
|
//! store given matrix at given position into the current matrix
|
|
void storesubmatrix(const int fromrow, const int fromcol, const NRMat &rhs);
|
|
void storesubmatrix(const NRVec<int> &rows, const NRVec<int> &cols, const NRMat &rhs);
|
|
|
|
//! perform the \b gemm operation
|
|
void gemm(const T &beta, const NRMat &a, const char transa, const NRMat &b, const char transb, const T &alpha);
|
|
|
|
//! compute the norm of this matrix
|
|
const typename LA_traits<T>::normtype norm(const T scalar = (T)0) const;
|
|
|
|
//! add up a scalar multiple of given matrix to the current matrix
|
|
void axpy(const T alpha, const NRMat &x);
|
|
|
|
//! maximal element in the absolute value
|
|
inline const T amax() const;
|
|
//! minimal element in the absolute value
|
|
inline const T amin() const;
|
|
|
|
//! determine the sum of the diagonal elements
|
|
const T trace() const;
|
|
|
|
//! swap the order of the rows of the current matrix
|
|
NRMat & swap_rows();
|
|
//! swap the order of the columns of the current matrix
|
|
NRMat & swap_cols();
|
|
//! swap the order of the rows and columns of the current matrix
|
|
NRMat & swap_rows_cols();
|
|
|
|
// LV - swapping of columns i and j
|
|
NRMat & swap_cols(const int i, const int j);
|
|
// LV - swapping of rows i and j
|
|
NRMat & swap_rows(const int i, const int j);
|
|
|
|
//rotate rows or columns through an angle
|
|
NRMat & rotate_cols(const int i, const int j, const T phi);
|
|
NRMat & rotate_rows(const int i, const int j, const T phi);
|
|
|
|
|
|
//! multiply by sparse matrix
|
|
SparseSMat<T> operator*(const SparseSMat<T> &rhs) const;
|
|
|
|
//! explicit constructor converting sparse matrix into \c NRMat<T> object
|
|
explicit NRMat(const SparseMat<T> &rhs); // dense from sparse
|
|
//! explicit constructor converting sparse symmetric matrix into \c NRMat<T> object
|
|
explicit NRMat(const SparseSMat<T> &rhs);
|
|
//! explicit constructor converting sparse CSR matrix into \c NRMat<T> object
|
|
explicit NRMat(const CSRMat<T> &rhs);
|
|
|
|
//! add up given sparse matrix
|
|
NRMat & operator+=(const SparseMat<T> &rhs);
|
|
//! subtract given sparse matrix
|
|
NRMat & operator-=(const SparseMat<T> &rhs);
|
|
|
|
//! perform the \b gemm operation
|
|
void gemm(const T &beta, const SparseMat<T> &a, const char transa, const NRMat &b, const char transb, const T &alpha);
|
|
|
|
inline void simplify() {};
|
|
bool issymmetric() const { return 0; };
|
|
|
|
const typename LA_traits<T>::normtype nonsymmetry() const;
|
|
const typename LA_traits<T>::normtype nonhermiticity() const;
|
|
|
|
#ifndef NO_STRASSEN
|
|
//! Strassen's multiplication (better than \f$\mathacal{O}(n^3)\f$, analogous syntax to \see NRMat<T>::gemm() )
|
|
void strassen(const T beta, const NRMat &a, const char transa, const NRMat &b, const char transb, const T alpha);
|
|
void s_cutoff(const int,const int,const int,const int) const;
|
|
#endif
|
|
void swap(NRMat &rhs) //more efficient swap than via tmp and constructors and operator=
|
|
{
|
|
int tmpnn=nn; nn=rhs.nn; rhs.nn=tmpnn;
|
|
int tmpmm=mm; mm=rhs.mm; rhs.mm=tmpmm;
|
|
#ifdef MATPTR
|
|
T **
|
|
#else
|
|
T *
|
|
#endif
|
|
tmpv=v; v=rhs.v; rhs.v=tmpv;
|
|
int *tmpcount=count; count=rhs.count; rhs.count=tmpcount;
|
|
#ifdef CUDALA
|
|
GPUID tmplocation=location; location=rhs.location; rhs.location=tmplocation;
|
|
#endif
|
|
|
|
}
|
|
NRMat<typename LA_traits<T>::normtype> abs() const; //elementwise absolute values
|
|
NRMat<typename LA_traits<T>::normtype> diffabs(const NRMat &rhs) const; //difference of absolute values
|
|
};
|
|
|
|
}//namespace
|
|
|
|
//due to mutual includes this has to be after full class declaration
|
|
#include "vec.h"
|
|
#include "smat.h"
|
|
#include "sparsemat.h"
|
|
#include "sparsesmat.h"
|
|
#include "permutation.h"
|
|
|
|
namespace LA {
|
|
|
|
/***************************************************************************//**
|
|
* matrix constructor
|
|
* @param[in] n number of rows of the matrix being created
|
|
* @param[in] m number of cols of the matrix being created
|
|
* @param[in] loc location for storing the matrix
|
|
* @see count, v, location
|
|
******************************************************************************/
|
|
template <typename T>
|
|
NRMat<T>::NRMat(const int n, const int m, const GPUID loc) : nn(n), mm(m), count(new int) {
|
|
T* p;
|
|
*count = 1;
|
|
const size_t nm = (size_t)n*m;
|
|
#ifdef CUDALA
|
|
location = (loc==undefined?DEFAULT_LOC:loc);
|
|
if(location == cpu) {
|
|
#endif
|
|
#ifdef MATPTR
|
|
v = new T*[n];
|
|
p = v[0] = new T[nm];
|
|
for (int i=1; i<n; i++) v[i] = v[i-1] + m;
|
|
#else
|
|
p = v = new T[nm];
|
|
#endif
|
|
#ifdef CUDALA
|
|
}else{
|
|
const T val = 0;
|
|
v = (T*) gpualloc(nm*sizeof(T));
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* matrix constructor
|
|
* @param[in] a value of type T intended for matrix inicialization
|
|
* @param[in] n number of rows of the matrix being created
|
|
* @param[in] m number of cols of the matrix being created
|
|
* @see count, v
|
|
******************************************************************************/
|
|
template <typename T>
|
|
NRMat<T>::NRMat(const T &a, const int n, const int m, const GPUID loc) : nn(n), mm(m), count(new int) {
|
|
const size_t nm = (size_t)n*m;
|
|
T *p;
|
|
*count = 1;
|
|
|
|
#ifdef CUDALA
|
|
location = (loc==undefined?DEFAULT_LOC:loc);
|
|
if(location==cpu){
|
|
#endif
|
|
#ifdef MATPTR
|
|
v = new T*[n];
|
|
p = v[0] = new T[nm];
|
|
for (register int i=1; i<n; i++) v[i] = v[i-1] + m;
|
|
#else
|
|
p = v = new T[nm];
|
|
#endif
|
|
if (!LA_traits<T>::is_plaindata() || a != (T)0){
|
|
for (register int i=0; i<nm; i++) *p++ = a;
|
|
}else{
|
|
memset(p, 0, nm*sizeof(T));
|
|
}
|
|
#ifdef CUDALA
|
|
}else{
|
|
if(sizeof(T)%sizeof(float) != 0)laerror("memory alignment error");
|
|
|
|
v = (T*)gpualloc(nm*sizeof(T));
|
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
|
smart_gpu_set(nm, a, v);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
/***************************************************************************//**
|
|
* inline constructor creating matrix from an array
|
|
******************************************************************************/
|
|
|
|
|
|
template <typename T> template<int R, int C>
|
|
inline NRMat<T>::NRMat(const T (&a)[R][C]) : count(new int) {
|
|
nn = R;
|
|
mm = C;
|
|
*count = 1;
|
|
|
|
|
|
#ifdef CUDALA
|
|
if(location==cpu){
|
|
#endif
|
|
#ifdef MATPTR
|
|
v = new T*[nn];
|
|
v[0] = new T[nn*mm];
|
|
for (register int i=1; i<n; i++) v[i] = v[i-1] + m;
|
|
if(LA_traits<T>::is_plaindata()) memcpy(v[0], a, nn*mm*sizeof(T));
|
|
else for( int i=0; i<nn; i++) for(int j=0; j<mm; ++j) v[i][j]=a[i][j];
|
|
#else
|
|
v = new T[nn*mm];
|
|
if(LA_traits<T>::is_plaindata()) memcpy(v, a, nn*mm*sizeof(T));
|
|
else for( int i=0; i<nn; i++) for(int j=0; j<mm; ++j) v[i*mm+j] = a[i][j];
|
|
#endif
|
|
#ifdef CUDALA
|
|
}else{
|
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
|
v = (T*) gpualloc(nn*mm*sizeof(T));
|
|
cublasSetVector(nm, sizeof(T), a, 1, v, 1);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
/***************************************************************************//**
|
|
* matrix constructor
|
|
* @param[in] a value of type T intended for matrix inicialization
|
|
* @param[in] n number of rows of the matrix being created
|
|
* @param[in] m number of cols of the matrix being created
|
|
* @see count, v
|
|
******************************************************************************/
|
|
template <typename T>
|
|
NRMat<T>::NRMat(const T &a, const int n, const int m) : nn(n), mm(m), count(new int) {
|
|
const size_t nm = (size_t)n*m;
|
|
T *p;
|
|
*count = 1;
|
|
|
|
#ifdef CUDALA
|
|
location = DEFAULT_LOC;
|
|
if(location==cpu){
|
|
#endif
|
|
#ifdef MATPTR
|
|
v = new T*[n];
|
|
p = v[0] = new T[nm];
|
|
for (register int i=1; i<n; i++) v[i] = v[i-1] + m;
|
|
#else
|
|
p = v = new T[nm];
|
|
#endif
|
|
if (!LA_traits<T>::is_plaindata() || a != (T)0)
|
|
for (register int i=0; i<nm; i++) *p++ = a;
|
|
else
|
|
memset(p, 0, nm*sizeof(T));
|
|
#ifdef CUDALA
|
|
}else{
|
|
v = (T*)gpualloc(nm*sizeof(T));
|
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
|
smart_gpu_set(nm, a, v);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* matrix constructor
|
|
* @param[in] a pointer to values of type T intended for matrix inicialization
|
|
* @param[in] n number of rows of the matrix being created
|
|
* @param[in] m number of cols of the matrix being created
|
|
* @see count, v
|
|
******************************************************************************/
|
|
template <typename T>
|
|
NRMat<T>::NRMat(const T *a, const int n, const int m) : nn(n), mm(m), count(new int) {
|
|
const size_t nm = (size_t)n*m;
|
|
#ifdef CUDALA
|
|
location = DEFAULT_LOC;
|
|
#endif
|
|
|
|
*count = 1;
|
|
#ifdef CUDALA
|
|
if(location==cpu){
|
|
#endif
|
|
#ifdef MATPTR
|
|
v = new T*[n];
|
|
v[0] = new T[nm];
|
|
for (register int i=1; i<n; i++) v[i] = v[i-1] + m;
|
|
if(LA_traits<T>::is_plaindata()) memcpy(v[0], a, nm*sizeof(T));
|
|
else for(int i=0; i<nm; ++i) v[0][i] = a[i];
|
|
#else
|
|
v = new T[nm];
|
|
if(LA_traits<T>::is_plaindata()) memcpy(v, a, nm*sizeof(T));
|
|
else for(int i=0; i<nm; ++i) v[i] = a[i];
|
|
#endif
|
|
#ifdef CUDALA
|
|
}else{
|
|
v = (T*) gpualloc(nm*sizeof(T));
|
|
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
|
|
cublasSetVector(nm, sizeof(T), a, 1, v, 1);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* copy constructor implementing shallow copy
|
|
* @param[in] rhs reference object to be copied
|
|
* @see count, v
|
|
******************************************************************************/
|
|
template <typename T>
|
|
NRMat<T>::NRMat(const NRMat &rhs) {
|
|
#ifdef CUDALA
|
|
location = rhs.location;
|
|
#endif
|
|
nn = rhs.nn;
|
|
mm = rhs.mm;
|
|
count = rhs.count;
|
|
v = rhs.v;
|
|
if (count) ++(*count);
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* create matrix from a \c NRSMat object
|
|
* @param[in] rhs \c NRSMat input object to be converted
|
|
* @see count, v, vec.h, NRSMat<T>
|
|
******************************************************************************/
|
|
template <typename T>
|
|
NRMat<T>::NRMat(const NRSMat<T> &rhs) {
|
|
NOT_GPU(rhs);
|
|
|
|
#ifdef CUDALA
|
|
location = rhs.location;
|
|
#endif
|
|
|
|
int i(0), j(0), k(0);
|
|
nn = mm = rhs.nrows();
|
|
count = new int;
|
|
*count = 1;
|
|
#ifdef MATPTR
|
|
v = new T*[nn];
|
|
v[0] = new T[(size_t)mm*nn];
|
|
for (int i=1; i<nn; i++) v[i] = v[i-1] + mm;
|
|
#else
|
|
v = new T[(size_t)mm*nn];
|
|
#endif
|
|
|
|
#ifdef MATPTR
|
|
for (i=0; i<nn; i++){
|
|
for (j=0; j<=i; j++){
|
|
v[i][j] = v[j][i] = rhs[k++];
|
|
}
|
|
}
|
|
#else
|
|
for (i=0; i<nn; i++){
|
|
for (j=0; j<=i; j++){
|
|
v[i*(size_t)nn + j] = v[j*(size_t)nn + i] = rhs[k++];
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* create matrix from a vector (shallow copy)
|
|
* @param[in] rhs NRVec vector containing the data
|
|
* @param[in] n number of rows of the matrix being created
|
|
* @param[in] m number of cols of the matrix being created
|
|
* @see count, v, vec.h
|
|
******************************************************************************/
|
|
#ifndef MATPTR
|
|
template <typename T>
|
|
NRMat<T>::NRMat(const NRVec<T> &rhs, const int n, const int m, const int offset)
|
|
{
|
|
if (offset < 0 || (size_t)n*m + offset > rhs.nn) laerror("matrix dimensions and offset incompatible with vector length");
|
|
if(offset!=0) std::cout << "dangerous: if the underlying vector is deallocated before the matrix, wrong delete[] will result for nonzero offset!!!\n";
|
|
#ifdef CUDALA
|
|
location=rhs.location;
|
|
#endif
|
|
|
|
nn = n;
|
|
mm = m;
|
|
count = rhs.count;
|
|
v = rhs.v + offset;//!< make just shallow copy
|
|
(*count)++;//!< therefore increase the reference counter
|
|
}
|
|
#endif
|
|
|
|
/***************************************************************************//**
|
|
* \c NRMat + \c NRSmat via operator +=
|
|
* @param[in] rhs NRSMat matrix to be subtracted from current matrix
|
|
* @return result of the subtraction
|
|
* @see NRMat<T>::operator+=(const NRSMat<T> &)
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline const NRMat<T> NRMat<T>::operator+(const NRSMat<T> &rhs) const {
|
|
return NRMat<T>(*this) += rhs;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* \c NRMat - \c NRSmat via operator -=
|
|
* @param[in] rhs NRSMat matrix to be subtracted from current matrix
|
|
* @return result of the subtraction
|
|
* @see NRMat<T>::operator-=(const NRSMat<T> &)
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline const NRMat<T> NRMat<T>::operator-(const NRSMat<T> &rhs) const {
|
|
return NRMat<T>(*this) -= rhs;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* @param[in] i row number
|
|
* @return pointer to the first element in the i-th row
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline T* NRMat<T>::operator[](const int i) {
|
|
#ifdef DEBUG
|
|
if (_LA_count_check && *count != 1) laerror("matrix with *count>1 used as l-value");
|
|
if (i < 0 || i >= nn) laerror("Mat [] out of range");
|
|
if (!v) laerror("unallocated matrix");
|
|
#endif
|
|
#ifdef MATPTR
|
|
return v[i];
|
|
#else
|
|
return v + i*(size_t)mm;
|
|
#endif
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* @param[in] i row number
|
|
* @return const pointer to the first element in the i-th row
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline const T* NRMat<T>::operator[](const int i) const {
|
|
#ifdef DEBUG
|
|
if (i < 0 || i >= nn) laerror("index out of range");
|
|
if (!v) laerror("unallocated matrix");
|
|
#endif
|
|
NOT_GPU(*this);
|
|
#ifdef MATPTR
|
|
return v[i];
|
|
#else
|
|
return v + i*(size_t)mm;
|
|
#endif
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* for a given matrix \f$A\f$, determine the element with indices (i,j)
|
|
* @param[in] i row number
|
|
* @param[in] j col number
|
|
* @return reference to \f$A_{i,j}\f$
|
|
* @see NRMat<T>::count
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline T& NRMat<T>::operator()(const int i, const int j){
|
|
#ifdef DEBUG
|
|
if (_LA_count_check && *count != 1) laerror("NRMat::operator(,) used as l-value for a matrix with count > 1");
|
|
if (i < 0 || i >= nn && nn > 0 || j < 0 || j >= mm && mm > 0) laerror("index out of range");
|
|
if (!v) laerror("unallocated matrix");
|
|
#endif
|
|
NOT_GPU(*this);
|
|
#ifdef MATPTR
|
|
return v[i][j];
|
|
#else
|
|
return v[i*(size_t)mm + j];
|
|
#endif
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* for a given matrix \f$A\f$, determine the element with indices (i,j)
|
|
* @param[in] i row number
|
|
* @param[in] j col number
|
|
* @return const reference to \f$A_{i,j}\f$
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline const T& NRMat<T>::operator()(const int i, const int j) const{
|
|
T ret;
|
|
#ifdef DEBUG
|
|
if (i<0 || i>=nn && nn>0 || j<0 || j>=mm && mm>0) laerror("index out of range");
|
|
if (!v) laerror("unallocated matrix");
|
|
#endif
|
|
NOT_GPU(*this);
|
|
#ifdef MATPTR
|
|
return v[i][j];
|
|
#else
|
|
return v[i*(size_t)mm + j];
|
|
#endif
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* for a given matrix \f$A\f$, determine the element with indices (i,j)
|
|
* @param[in] i row number
|
|
* @param[in] j col number
|
|
* @return const reference to \f$A_{i,j}\f$
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline const T NRMat<T>::get_ij(const int i, const int j) const{
|
|
T ret;
|
|
#ifdef DEBUG
|
|
if (i<0 || i>=nn || j<0 || j>=mm) laerror("index out of range");
|
|
if (!v) laerror("unallocated matrix");
|
|
#endif
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
#ifdef MATPTR
|
|
return v[i][j];
|
|
#else
|
|
return v[i*(size_t)mm + j];
|
|
#endif
|
|
#ifdef CUDALA
|
|
}else{
|
|
const size_t pozice = i*(size_t)mm + j;
|
|
gpuget(1, sizeof(T), v + pozice, &ret);
|
|
return ret;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* @return number of rows
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline int NRMat<T>::nrows() const{
|
|
return nn;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* @return number of columns
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline int NRMat<T>::ncols() const{
|
|
return mm;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* @return number of elements
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline size_t NRMat<T>::size() const{
|
|
return (size_t)nn*mm;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* @return pointer of general type T to the underlying data structure
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline NRMat<T>::operator T*(){
|
|
#ifdef DEBUG
|
|
if (!v) laerror("unallocated matrix");
|
|
#endif
|
|
#ifdef MATPTR
|
|
return v[0];
|
|
#else
|
|
return v;
|
|
#endif
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* @return const pointer of general type T to the underlying data
|
|
******************************************************************************/
|
|
template <typename T>
|
|
inline NRMat<T>::operator const T*() const{
|
|
#ifdef DEBUG
|
|
if (!v) laerror("unallocated matrix");
|
|
#endif
|
|
#ifdef MATPTR
|
|
return v[0];
|
|
#else
|
|
return v;
|
|
#endif
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* for this real matrix \f$A\f$, determine the first element
|
|
* with largest absolute value
|
|
* @return \f$A_{l,m}\f$ which maximizes \f$\left|A_{i,j}\right|\f$
|
|
******************************************************************************/
|
|
template<>
|
|
inline const double NRMat<double>::amax() const{
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
#ifdef MATPTR
|
|
return v[0][cblas_idamax(nn*mm, v[0], 1) - 1];
|
|
#else
|
|
return v[cblas_idamax(nn*mm, v, 1) - 1];
|
|
#endif
|
|
#ifdef CUDALA
|
|
}else{
|
|
double ret(0.0);
|
|
const size_t pozice = cublasIdamax((size_t)nn*mm, v, 1) - 1;
|
|
TEST_CUBLAS("cublasIdamax");
|
|
gpuget(1, sizeof(double), v + pozice, &ret);
|
|
return ret;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* for this real matrix \f$A\f$, determine the first element
|
|
* with smallest absolute value
|
|
* @return \f$A_{l,m}\f$ which minimizes \f$\left|A_{i,j}\right|\f$
|
|
******************************************************************************/
|
|
template<>
|
|
inline const double NRMat<double>::amin() const{
|
|
double ret(0.0);
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
// idamin seems not to be supported
|
|
const size_t nm = (size_t)nn*mm;
|
|
double val(0.0);
|
|
int index(-1);
|
|
ret = std::numeric_limits<double>::max();
|
|
for(register int i=0; i < nm; i++){
|
|
#ifdef MATPTR
|
|
val = std::abs(v[0][i]);
|
|
#else
|
|
val = std::abs(v[i]);
|
|
#endif
|
|
if(val < ret){ index = i; ret = val; }
|
|
}
|
|
#ifdef MATPTR
|
|
ret = v[0][index];
|
|
#else
|
|
ret = v[index];
|
|
#endif
|
|
#ifdef CUDALA
|
|
}else{
|
|
const size_t pozice = cublasIdamin((size_t)nn*mm, v, 1) - 1;
|
|
TEST_CUBLAS("cublasIdamin");
|
|
gpuget(1, sizeof(double), v + pozice, &ret);
|
|
}
|
|
#endif
|
|
return ret;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* for this complex matrix \f$A\f$, determine the smallest index of the maximum
|
|
* magnitude element, i.e. maximal element in the 1-norm
|
|
* @return \f$A_{l,m}\f$ which maximizes \f$\left\{\left|\Re{}A_{i,j}\right|+\left|\Im{}A_{i,j}\right|\right}\f$
|
|
******************************************************************************/
|
|
template<>
|
|
inline const std::complex<double> NRMat<std::complex<double> >::amax() const{
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
#ifdef MATPTR
|
|
return v[0][cblas_izamax(nn*mm, v[0], 1) - 1];
|
|
#else
|
|
return v[cblas_izamax(nn*mm, v, 1) - 1];
|
|
#endif
|
|
#ifdef CUDALA
|
|
}else{
|
|
std::complex<double> ret(0.0, 0.0);
|
|
const size_t pozice = cublasIzamax((size_t)nn*mm, (cuDoubleComplex*)v, 1) - 1;
|
|
TEST_CUBLAS("cublasIzamax");
|
|
gpuget(1, sizeof(std::complex<double>), v + pozice, &ret);
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* for this complex matrix \f$A\f$, determine the smallest index of the minimum
|
|
* magnitude element, i.e. minimal element in the 1-norm
|
|
* @return \f$A_{l,m}\f$ which minimizes \f$\left\{\left|\Re{}A_{i,j}\right|+\left|\Im{}A_{i,j}\right|\right}\f$
|
|
******************************************************************************/
|
|
template<>
|
|
inline const std::complex<double> NRMat<std::complex<double> >::amin() const{
|
|
std::complex<double> ret(0.0, 0.0);
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
// idamin seems not to be supported
|
|
const size_t nm = (size_t)nn*mm;
|
|
int index(-1);
|
|
double val(0.0), min_val(0.0);
|
|
std::complex<double> z_val(0.0, 0.0);
|
|
|
|
min_val = std::numeric_limits<double>::max();
|
|
for(register int i=0; i < nm; i++){
|
|
#ifdef MATPTR
|
|
z_val = v[0][i];
|
|
#else
|
|
z_val = v[i];
|
|
#endif
|
|
val = std::abs(z_val.real()) + std::abs(z_val.imag());
|
|
if(val < min_val){ index = i; min_val = val; }
|
|
}
|
|
#ifdef MATPTR
|
|
ret = v[0][index];
|
|
#else
|
|
ret = v[index];
|
|
#endif
|
|
#ifdef CUDALA
|
|
}else{
|
|
const size_t pozice = cublasIzamin((size_t)nn*mm, (cuDoubleComplex*)v, 1) - 1;
|
|
TEST_CUBLAS("cublasIzamin");
|
|
gpuget(1, sizeof(std::complex<double>), v + pozice, &ret);
|
|
}
|
|
#endif
|
|
return ret;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* destructor for general type
|
|
* @see NRMat<T>::count
|
|
******************************************************************************/
|
|
template <typename T>
|
|
NRMat<T>::~NRMat() {
|
|
if(!count) return;
|
|
if(--(*count) <= 0) {
|
|
if (v){
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
#ifdef MATPTR
|
|
delete[] (v[0]);
|
|
#endif
|
|
delete[] v;
|
|
#ifdef CUDALA
|
|
}else{
|
|
gpufree(v);
|
|
}
|
|
#endif
|
|
}
|
|
delete count;
|
|
}
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* assigment operator for general type between NRMat and NRMat
|
|
* @see count
|
|
* @return reference to the newly assigned matrix
|
|
******************************************************************************/
|
|
template <typename T>
|
|
NRMat<T> & NRMat<T>::operator=(const NRMat<T> &rhs) {
|
|
if (this != &rhs){
|
|
if (count){
|
|
if (--(*count) ==0 ){
|
|
#ifdef CUDALA
|
|
if(location == cpu){
|
|
#endif
|
|
#ifdef MATPTR
|
|
delete[] (v[0]);
|
|
#endif
|
|
delete[] v;
|
|
#ifdef CUDALA
|
|
}else{ gpufree(v); }
|
|
#endif
|
|
delete count;
|
|
}
|
|
}
|
|
v = rhs.v;
|
|
#ifdef CUDALA
|
|
location = rhs.location;
|
|
#endif
|
|
nn = rhs.nn;
|
|
mm = rhs.mm;
|
|
count = rhs.count;
|
|
if(count) (*count)++;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
|
|
/***************************************************************************//**
|
|
* perform an explicit deep copy of \c NRMat object
|
|
* @see count
|
|
* @return reference to the newly copied matrix
|
|
******************************************************************************/
|
|
template <typename T>
|
|
NRMat<T> & NRMat<T>::operator|=(const NRMat<T> &rhs) {
|
|
if(this == &rhs) return *this; // test to avoid self-assignment
|
|
*this = rhs;
|
|
this->copyonwrite();
|
|
return *this;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* create own deep copy
|
|
* @see NRMat<T>::count, NRMat<T>::operator|=()
|
|
******************************************************************************/
|
|
template <typename T>
|
|
void NRMat<T>::copyonwrite(bool detachonly) {
|
|
if(!count)
|
|
{
|
|
if(nn||mm) laerror("nonempty matrix without reference count encountered");
|
|
if(_LA_warn_empty_copyonwrite) std::cout <<"Warning: copyonwrite of empty matrix\n";
|
|
return;
|
|
}
|
|
|
|
if(*count > 1){
|
|
(*count)--;
|
|
count = new int;
|
|
*count = 1;
|
|
#ifdef CUDALA
|
|
if(location == cpu){ //matrix is in CPU memory
|
|
#endif
|
|
#ifdef MATPTR
|
|
T **newv = new T*[nn];
|
|
newv[0] = new T[(size_t)mm*nn];
|
|
if(!detachonly)
|
|
{
|
|
if(LA_traits<T>::is_plaindata()) memcpy(newv[0], v[0], (size_t)mm*nn*sizeof(T));
|
|
else
|
|
{
|
|
for(int i=0; i<nn*mm; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
|
|
}
|
|
}
|
|
v = newv;
|
|
for(register int i=1; i<nn; i++) v[i] = v[i-1] + mm;
|
|
#else
|
|
T *newv = new T[(size_t)mm*nn];
|
|
if(!detachonly)
|
|
{
|
|
if(LA_traits<T>::is_plaindata()) memcpy(newv, v, (size_t)mm*nn*sizeof(T));
|
|
else
|
|
{
|
|
for(int i=0; i<nn*mm; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
|
|
}
|
|
}
|
|
v = newv;
|
|
#endif
|
|
#ifdef CUDALA
|
|
}else{ //matrix is in GPU memory
|
|
if(!LA_traits<T>::is_plaindata()) laerror("nested types not supported on gpu memory");
|
|
T *newv = (T *) gpualloc((size_t)mm*nn*sizeof(T));
|
|
if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem");
|
|
if(!detachonly) cublasScopy(nn*mm*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
|
|
TEST_CUBLAS("cublasScopy");
|
|
v = newv;
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* resize given matrix
|
|
* @param[in] n number of rows
|
|
* @param[in] m number of cols
|
|
* @see count, NRMat<T>::copyonwrite(), NRMat<T>::operator|=()
|
|
* @return reference to the newly copied matrix
|
|
******************************************************************************/
|
|
template <typename T>
|
|
void NRMat<T>::resize(int n, int m) {
|
|
#ifdef DEBUG
|
|
if (n<0 || m<0) laerror("illegal dimensions");
|
|
#endif
|
|
|
|
//allow trivial dimensions
|
|
if(n == 0 || m == 0) m = n =0;
|
|
|
|
if(count){
|
|
if(n==0 && m==0){
|
|
if(--(*count) <= 0){
|
|
#ifdef CUDALA
|
|
if(location==cpu){
|
|
#endif
|
|
#ifdef MATPTR
|
|
if(v) delete[] (v[0]);
|
|
#endif
|
|
if(v) delete[] v;
|
|
#ifdef CUDALA
|
|
}
|
|
else { gpufree(v); }
|
|
#endif
|
|
delete count;
|
|
}
|
|
count = 0;
|
|
nn = mm = 0;
|
|
v = 0;
|
|
return;
|
|
}
|
|
/*
|
|
if we have more than one reference to this matrix, set count to NULL
|
|
in order to reach the if-branch below where new memory resources are allocated
|
|
*/
|
|
if(*count > 1){
|
|
(*count)--;
|
|
count = 0;
|
|
nn = mm = 0;
|
|
v = 0;
|
|
}
|
|
}
|
|
|
|
if(!count){
|
|
count = new int;
|
|
*count = 1;
|
|
nn = n;
|
|
mm = m;
|
|
#ifdef CUDALA
|
|
if(location==cpu){
|
|
#endif
|
|
#ifdef MATPTR
|
|
v = new T*[nn];
|
|
v[0] = new T[(size_t)m*n];
|
|
for (register int i=1; i< n; i++) v[i] = v[i-1] + m;
|
|
#else
|
|
v = new T[(size_t)m*n];
|
|
#endif
|
|
#ifdef CUDALA
|
|
}else{
|
|
v = (T *) gpualloc((size_t)n*m*sizeof(T));
|
|
}
|
|
#endif
|
|
return;
|
|
}
|
|
|
|
// at this point *count = 1, check if resize is necessary
|
|
if (n != nn || m != mm) {
|
|
nn = n;
|
|
mm = m;
|
|
#ifdef CUDALA
|
|
if(location==cpu){
|
|
#endif
|
|
#ifdef MATPTR
|
|
delete[] (v[0]);
|
|
#endif
|
|
delete[] v;
|
|
#ifdef MATPTR
|
|
v = new T*[nn];
|
|
v[0] = new T[(size_t)m*n];
|
|
for (int i=1; i< n; i++) v[i] = v[i-1] + m;
|
|
#else
|
|
v = new T[(size_t)m*n];
|
|
#endif
|
|
#ifdef CUDALA
|
|
}else{
|
|
gpufree(v);
|
|
v=(T *) gpualloc((size_t)n*m*sizeof(T));
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
|
|
/***************************************************************************//**
|
|
* complexify a given matrix \f$A\f$
|
|
* @param[in] rhs matrix \f$A\f$ intended for this operation
|
|
* @return matrix \f$B\f$ where \f$\Re B=A\f$ and \f$\Im B = 0\f$
|
|
******************************************************************************/
|
|
template<typename T>
|
|
NRMat<std::complex<T> > complexify(const NRMat<T> &rhs) {
|
|
NOT_GPU(rhs);
|
|
|
|
NRMat<std::complex<T> > r(rhs.nrows(), rhs.ncols(), rhs.getlocation());
|
|
for(register int i=0; i<rhs.nrows(); ++i){
|
|
for(register int j=0; j<rhs.ncols(); ++j) r(i,j) = rhs(i,j);
|
|
}
|
|
return r;
|
|
}
|
|
|
|
//this is general for any type, complexmatrix() uses blas for doubles
|
|
template<typename T>
|
|
NRMat<std::complex<T> > complexify(const NRMat<T> &rhsr, const NRMat<T> &rhsi) {
|
|
NOT_GPU(rhsr);
|
|
NOT_GPU(rhsi);
|
|
if(rhsr.nrows()!=rhsi.nrows() || rhsr.ncols()!=rhsi.ncols()) laerror("inconsistent dimensions in complexify");
|
|
NRMat<std::complex<T> > r(rhsr.nrows(), rhsr.ncols(), rhsr.getlocation());
|
|
for(register int i=0; i<rhsr.nrows(); ++i){
|
|
for(register int j=0; j<rhsr.ncols(); ++j) r(i,j) = std::complex<T>(rhsr(i,j),rhsi(i,j));
|
|
}
|
|
return r;
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* output operator
|
|
* @param[in,out] s output stream
|
|
* @param[in] x NRMat matrix to be prited out
|
|
* @return modified stream
|
|
******************************************************************************/
|
|
template <typename T>
|
|
std::ostream& operator<<(std::ostream &s, const NRMat<T> &x) {
|
|
#ifdef CUDALA
|
|
if(x.getlocation() == cpu){
|
|
#endif
|
|
int i(0),j(0);
|
|
int n(x.nrows()), m(x.ncols());
|
|
s << n << ' ' << m << '\n';
|
|
for(i=0; i<n; i++){
|
|
for(j=0; j<m; j++){
|
|
// endl cannot be used in the conditional expression, since it is an overloaded function
|
|
s << (typename LA_traits_io<T>::IOtype) x[i][j] << (j==m-1 ? '\n' : ' ');
|
|
}
|
|
}
|
|
return s;
|
|
#ifdef CUDALA
|
|
}else{
|
|
NRMat<T> tmp = x;
|
|
tmp.moveto(cpu);
|
|
return s << tmp;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* input operator
|
|
* @param[in,out] s input stream
|
|
* @param[in] x NRMat matrix for storing the input
|
|
* @return modified stream
|
|
******************************************************************************/
|
|
template <typename T>
|
|
std::istream& operator>>(std::istream &s, NRMat<T> &x)
|
|
{
|
|
#ifdef CUDALA
|
|
if(x.getlocation() == cpu){
|
|
#endif
|
|
int i(0), j(0), n(0), m(0);
|
|
s >> n >> m;
|
|
x.resize(n, m);
|
|
typename LA_traits_io<T>::IOtype tmp;
|
|
for(i=0;i<n;i++){
|
|
for(j=0; j<m;j++){
|
|
s >> tmp;
|
|
x[i][j] = tmp;
|
|
}
|
|
}
|
|
return s;
|
|
#ifdef CUDALA
|
|
}else{
|
|
NRMat<T> tmp;
|
|
tmp.moveto(cpu);
|
|
s >> tmp;
|
|
tmp.moveto(x.getlocation());
|
|
x = tmp;
|
|
return s;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/***************************************************************************//**
|
|
* implements \c NRMat<T> functionality with indexing from 1
|
|
* all possible constructors have to be given explicitly, other stuff is inherited
|
|
* with exception of the operator() which differs
|
|
******************************************************************************/
|
|
template<typename T>
|
|
class NRMat_from1 : public NRMat<T> {
|
|
public:
|
|
NRMat_from1(): NRMat<T>() {};
|
|
template<int R, int C> explicit NRMat_from1(const T (&a)[R][C]) : NRMat<T>(a) {};
|
|
explicit NRMat_from1(const int n): NRMat<T>(n) {};
|
|
explicit NRMat_from1(const NRSMat_from1<T> &rhs) : NRMat<T>(rhs) {};
|
|
NRMat_from1(const NRMat<T> &rhs): NRMat<T>(rhs) {};//!< be able to convert the parent class transparently to this
|
|
NRMat_from1(const int n, const int m): NRMat<T>(n, m) {};
|
|
NRMat_from1(const T &a, const int n, const int m): NRMat<T>(a, n, m) {};
|
|
NRMat_from1(const T *a, const int n, const int m): NRMat<T>(a, n, m) {};
|
|
|
|
inline const T& operator() (const int i, const int j) const {
|
|
#ifdef DEBUG
|
|
if (i<1 || i>NRMat<T>::nn || j<1 || j>NRMat<T>::mm) laerror("index out of range");
|
|
if (!NRMat<T>::v) laerror("unallocated matrix");
|
|
#endif
|
|
NOT_GPU(*this);
|
|
#ifdef MATPTR
|
|
return NRMat<T>::v[i - 1][j - 1];
|
|
#else
|
|
return NRMat<T>::v[(i-1)*(size_t)NRMat<T>::mm+j-1];
|
|
#endif
|
|
}
|
|
|
|
inline T& operator() (const int i, const int j) {
|
|
#ifdef DEBUG
|
|
if (_LA_count_check && *NRMat<T>::count != 1) laerror("matrix with *count > 1 used as l-value");
|
|
if (i<1 || i>NRMat<T>::nn || j<1 || j>NRMat<T>::mm) laerror("index out of range");
|
|
if (!NRMat<T>::v) laerror("unallocated matrix");
|
|
#endif
|
|
NOT_GPU(*this);
|
|
#ifdef MATPTR
|
|
return NRMat<T>::v[i-1][j-1];
|
|
#else
|
|
return NRMat<T>::v[(i-1)*NRMat<T>::mm+j-1];
|
|
#endif
|
|
}
|
|
|
|
inline const T get_ij(const int i, const int j) const {
|
|
T ret;
|
|
#ifdef DEBUG
|
|
if (i<1 || i>NRMat<T>::nn || j<1 || j>NRMat<T>::mm) laerror("index out of range");
|
|
if (!NRMat<T>::v) laerror("unallocated matrix");
|
|
#endif
|
|
#ifdef CUDALA
|
|
if(NRMat<T>::location == cpu){
|
|
#endif
|
|
#ifdef MATPTR
|
|
return NRMat<T>::v[i - 1][j - 1];
|
|
#else
|
|
return NRMat<T>::v[(size_t)(i-1)*NRMat<T>::mm + (j-1)];
|
|
#endif
|
|
#ifdef CUDALA
|
|
}else{
|
|
const size_t pozice = (size_t)(i-1)*NRMat<T>::mm + (j-1);
|
|
gpuget(1, sizeof(T), NRMat<T>::v + pozice, &ret);
|
|
return ret;
|
|
}
|
|
#endif
|
|
}
|
|
};
|
|
|
|
/***************************************************************************//**
|
|
* compute Hadamard (component-wise) product with a given matrix \f$A\f$
|
|
* @param[in] rhs matrix \f$A\f$
|
|
* @see count, operator*
|
|
* @return reference to the multiplied matrix
|
|
******************************************************************************/
|
|
template<typename T>
|
|
NRMat<T>& NRMat<T>::operator^=(const NRMat<T> &rhs){
|
|
#ifdef DEBUG
|
|
if (nn != rhs.nn || mm != rhs.mm) laerror("incompatible matrices");
|
|
#endif
|
|
SAME_LOC(*this, rhs);
|
|
NOT_GPU(*this);
|
|
|
|
copyonwrite();// ensure that *count == 1
|
|
#ifdef MATPTR
|
|
for (register size_t i=0; i< (size_t)nn*mm; i++) v[0][i] *= rhs.v[0][i];
|
|
#else
|
|
const size_t Dim = (size_t)nn*mm;
|
|
for(register size_t i=0; i<Dim; i++) v[i] *= rhs.v[i];
|
|
#endif
|
|
return *this;
|
|
}
|
|
|
|
|
|
/***************************************************************************//**
|
|
* performs memory movements in CUDA mode
|
|
* @param[in] dest memory destination
|
|
* @see count, location
|
|
******************************************************************************/
|
|
#ifdef CUDALA
|
|
template<typename T>
|
|
void NRMat<T>::moveto(const GPUID dest) {
|
|
if(location == dest) return;// no operation is necessary
|
|
/*
|
|
currently, only movements between CPU and GPU are implemented
|
|
CUBLAS seems to lack support for multiple GPUs
|
|
*/
|
|
CPU_GPU(location, dest);
|
|
location = dest;
|
|
|
|
if(v && !count) laerror("internal inconsistency of reference counting 1");
|
|
if (!count) return;
|
|
|
|
if(v && *count==0) laerror("internal inconsistency of reference counting 2");
|
|
if(!v) return;
|
|
|
|
T *vold = v;
|
|
|
|
if(dest == cpu){ //moving from GPU to CPU
|
|
v = new T[(size_t)nn*mm];
|
|
gpuget((size_t)nn*mm, sizeof(T), vold, v);
|
|
if(*count == 1){ gpufree(vold); }
|
|
else{ --(*count); count = new int(1); }
|
|
|
|
}else{ //moving from CPU to GPU
|
|
v = (T *) gpualloc((size_t)nn*mm*sizeof(T));
|
|
gpuput((size_t)nn*mm, sizeof(T), vold, v);
|
|
if(*count == 1) delete[] vold;
|
|
else{ --(*count); count = new int(1);}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
|
|
/***************************************************************************//**
|
|
* add a given general matrix (type T) \f$A\f$ to the current complex matrix
|
|
* @param[in] rhs matrix \f$A\f$ of type T
|
|
* @return reference to the modified matrix
|
|
******************************************************************************/
|
|
template <typename T>
|
|
NRMat<T> & NRMat<T>::operator+=(const NRMat<T> &rhs) {
|
|
#ifdef DEBUG
|
|
if (nn != rhs.nn || mm != rhs.mm) laerror("incompatible matrices");
|
|
#endif
|
|
SAME_LOC(*this, rhs);
|
|
NOT_GPU(*this);
|
|
|
|
copyonwrite();
|
|
|
|
#ifdef MATPTR
|
|
for(size_t i=0; i< (size_t)nn*mm; i++) v[0][i] += rhs.v[0][i];
|
|
#else
|
|
for(size_t i=0; i< (size_t)nn*mm; i++) v[i] += rhs.v[i];
|
|
#endif
|
|
return *this;
|
|
}
|
|
|
|
|
|
/***************************************************************************//**
|
|
* subtract a given general matrix (type T) \f$A\f$ from the current matrix
|
|
* @param[in] rhs matrix \f$A\f$ of type T
|
|
* @return reference to the modified matrix
|
|
******************************************************************************/
|
|
template <typename T>
|
|
NRMat<T> & NRMat<T>::operator-=(const NRMat<T> &rhs) {
|
|
#ifdef DEBUG
|
|
if (nn != rhs.nn || mm != rhs.mm) laerror("incompatible matrices");
|
|
#endif
|
|
SAME_LOC(*this, rhs);
|
|
NOT_GPU(*this);
|
|
|
|
copyonwrite();
|
|
|
|
#ifdef MATPTR
|
|
for(size_t i=0; i< (size_t)nn*mm; i++) v[0][i] -= rhs.v[0][i];
|
|
#else
|
|
for(size_t i=0; i<(size_t) nn*mm; i++) v[i] -= rhs.v[i];
|
|
#endif
|
|
return *this;
|
|
}
|
|
|
|
|
|
/*difference of absolute values*/
|
|
template <typename T>
|
|
NRMat<typename LA_traits<T>::normtype> NRMat<T>::diffabs(const NRMat<T> &rhs) const {
|
|
#ifdef DEBUG
|
|
if (nn != rhs.nn ||mm!=rhs.mm) laerror("incompatible dimensions");
|
|
#endif
|
|
NOT_GPU(*this);
|
|
NOT_GPU(rhs);
|
|
|
|
NRMat<typename LA_traits<T>::normtype> r(nn,mm);
|
|
for(size_t i=0; i< nn; i++) for(size_t j=0; j< mm; j++) r(i,j) = MYABS((*this)(i,j)) - MYABS(rhs(i,j));
|
|
return r;
|
|
}
|
|
|
|
/*elementwise absolute values*/
|
|
template <typename T>
|
|
NRMat<typename LA_traits<T>::normtype> NRMat<T>::abs() const {
|
|
NOT_GPU(*this);
|
|
|
|
NRMat<typename LA_traits<T>::normtype> r(nn,mm);
|
|
for(size_t i=0; i< nn; i++) for(size_t j=0; j< mm; j++) r(i,j) = MYABS((*this)(i,j));
|
|
return r;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/***************************************************************************//**
|
|
* generate operators: Mat + a, a + Mat, Mat * a
|
|
* corresponding macro is defined in vec.h
|
|
******************************************************************************/
|
|
NRVECMAT_OPER(Mat, +)
|
|
NRVECMAT_OPER(Mat, -)
|
|
NRVECMAT_OPER(Mat, *)
|
|
|
|
/***************************************************************************//**
|
|
* generate Mat + Mat, Mat - Mat
|
|
* corresponding macro is defined in vec.h
|
|
******************************************************************************/
|
|
NRVECMAT_OPER2(Mat, +)
|
|
NRVECMAT_OPER2(Mat, -)
|
|
|
|
}//end of the LA-namespace
|
|
#endif/* _LA_MAT_H_ */
|
|
|