LA_library/mat.h

1555 lines
49 KiB
C
Raw Normal View History

2010-09-08 18:27:58 +02:00
/* vim: set ts=8 sw=8 sts=8 noexpandtab cindent: */
/*******************************************************************************
2008-02-26 14:55:23 +01:00
LA: linear algebra C++ interface library
Copyright (C) 2008 Jiri Pittner <jiri.pittner@jh-inst.cas.cz> or <jiri@pittnerovi.com>
complex versions written by Roman Curik <roman.curik@jh-inst.cas.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
2010-09-08 18:27:58 +02:00
*******************************************************************************/
2004-03-17 04:07:21 +01:00
#ifndef _LA_MAT_H_
#define _LA_MAT_H_
2005-02-14 01:10:07 +01:00
#include "la_traits.h"
2004-03-17 04:07:21 +01:00
2009-11-12 22:01:19 +01:00
namespace LA {
2010-09-08 18:27:58 +02:00
//forward declaration
template<typename T> class NRPerm;
2021-05-14 17:39:22 +02:00
template<typename T> class CyclePerm;
template<typename T> class NRMat_from1;
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* \brief NRMat<T> class template implementing the matrix interface
* @see NRVec<T>, NRSMat<T>
******************************************************************************/
2004-03-17 04:07:21 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
class NRMat{
2004-03-17 04:07:21 +01:00
protected:
2010-09-08 18:27:58 +02:00
int nn;//!< number of rows
int mm;//!< number of columns
2004-03-17 04:07:21 +01:00
#ifdef MATPTR
2010-09-08 18:27:58 +02:00
T **v;//!< pointer to the array of pointers pointing to the beginings of individual rows
2004-03-17 04:07:21 +01:00
#else
2010-09-08 18:27:58 +02:00
T *v;//!< pointer to the data stored continuously in emmory
2004-03-17 04:07:21 +01:00
#endif
2010-09-08 18:27:58 +02:00
int *count;//!< reference counter
2013-11-04 15:56:39 +01:00
public:
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
GPUID location;
2010-06-25 17:28:19 +02:00
#endif
2004-03-17 04:07:21 +01:00
friend class NRVec<T>;
friend class NRSMat<T>;
2019-11-13 23:22:25 +01:00
friend class NRMat_from1<T>;
friend class NRSMat_from1<T>;
2004-03-17 04:07:21 +01:00
2010-09-08 18:27:58 +02:00
//! standard destructor
~NRMat();
/***************************************************************************//**
* \brief inlined constructor creating zero matrix of general type <code>T</code>
******************************************************************************/
inline NRMat() : nn(0), mm(0), v(0), count(0){
#ifdef CUDALA
location = DEFAULT_LOC;
#endif
};
/***************************************************************************//**
* \brief Inlined constructor creating matrix of given size and location.
* Because of performance reasons, no incialization is done.
* @param[in] n vector size (count of elements)
* @param[in] loc location of the underlying data (CPU/GPU)
******************************************************************************/
inline NRMat(const int n, const int m, const GPUID loc = undefined);
//! inlined constructor creating matrix of given size filled with prescribed value and stored at given location
inline NRMat(const T &a, const int n, const int m, const GPUID loc);
//! inlined constructor creating matrix of given size filled with prescribed value
2004-03-17 04:07:21 +01:00
inline NRMat(const T &a, const int n, const int m);
2010-09-08 18:27:58 +02:00
//! inlined constructor creating matrix of given size filled with data located at given memory location
2004-03-17 04:07:21 +01:00
NRMat(const T *a, const int n, const int m);
2010-09-08 18:27:58 +02:00
//! inlined constructor creating matrix of given size from an array
template<int R, int C> inline NRMat(const T (&a)[R][C]);
2010-09-08 18:27:58 +02:00
//! inlined copy-constructor
2004-03-17 04:07:21 +01:00
inline NRMat(const NRMat &rhs);
2010-09-08 18:27:58 +02:00
//! complexifying constructor
NRMat(const typename LA_traits_complex<T>::NRMat_Noncomplex_type &rhs, bool imagpart = false);
2011-01-18 15:37:05 +01:00
//! explicit decomplexifying constructor
2021-04-21 15:04:37 +02:00
explicit NRMat(const NRMat<std::complex<T> > &rhs, bool imagpart = false);
2010-09-08 18:27:58 +02:00
//! explicit constructor converting symmetric matrix stored in packed form into a <code>NRMat<T></code> object
2004-03-17 04:07:21 +01:00
explicit NRMat(const NRSMat<T> &rhs);
2010-09-08 18:27:58 +02:00
//! explicit constructor converting vector into a <code>NRMat<T></code> object
2005-11-20 14:46:00 +01:00
#ifdef MATPTR
2010-09-08 18:27:58 +02:00
explicit NRMat(const NRVec<T> &rhs, const int n, const int m, const int offset = 0):NRMat(&rhs[0][0] + offset , n, m){
2013-11-04 15:56:39 +01:00
if (offset < 0 || (size_t)n*m + offset > rhs.nn) laerror("matrix dimensions and offset incompatible with vector length");
2010-09-08 18:27:58 +02:00
};
2005-11-20 14:46:00 +01:00
#else
2010-09-08 18:27:58 +02:00
explicit NRMat(const NRVec<T> &rhs, const int n, const int m, const int offset = 0);
2004-03-17 04:07:21 +01:00
#endif
2010-09-08 18:27:58 +02:00
2005-02-14 01:10:07 +01:00
#ifdef MATPTR
2013-11-04 15:56:39 +01:00
const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return LA_traits<T>::gencmp(v[0],rhs.v[0],(size_t)nn*mm);} //memcmp for scalars else elementwise
2005-02-14 01:10:07 +01:00
#else
2013-11-04 15:56:39 +01:00
const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return LA_traits<T>::gencmp(v,rhs.v,(size_t)nn*mm);} //memcmp for scalars else elementwise
2005-02-14 01:10:07 +01:00
#endif
2010-09-08 18:27:58 +02:00
2005-02-14 01:10:07 +01:00
const bool operator==(const NRMat &rhs) const {return !(*this != rhs);};
2010-09-08 18:27:58 +02:00
//! determine the count of references to this object
2004-03-17 04:07:21 +01:00
inline int getcount() const {return count?*count:0;}
2010-09-08 18:27:58 +02:00
//! ensure that the data of this matrix are referenced exactly once
2013-11-04 15:56:39 +01:00
void copyonwrite(bool detachonly=false);
2010-09-08 18:27:58 +02:00
//! permute matrix elements
2021-05-19 22:29:47 +02:00
const NRMat permuted_rows(const NRPerm<int> &p, const bool inverse=false) const;
const NRMat permuted_cols(const NRPerm<int> &p, const bool inverse=false) const;
const NRMat permuted_both(const NRPerm<int> &p, const NRPerm<int> &q, const bool inverse=false) const;
void permuteme_rows(const CyclePerm<int> &p); //in place
void permuteme_cols(const CyclePerm<int> &p); //in place
void scale_row(const int i, const T f); //in place
void scale_col(const int i, const T f); //in place
void axpy(const T alpha, const NRPerm<int> &p, const bool direction);
2021-05-19 22:29:47 +02:00
explicit NRMat(const NRPerm<int> &p, const bool direction); //permutation matrix
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* routines for CUDA related stuff
* \li <code>getlocation()</code> gets the protected data member location
* \li <code>moveto(const GPUID)</code> moves underlying data between CPU/GPU memory
******************************************************************************/
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
inline GPUID getlocation() const {return location;}
void moveto(const GPUID dest);
#else
inline GPUID getlocation() const {return cpu;}
void moveto(const GPUID dest) {};
#endif
2010-09-08 18:27:58 +02:00
//! fill the matrix with pseudorandom numbers (uniform distribution)
void randomize(const typename LA_traits<T>::normtype &x);
//! assigment operator performing shallow copy
NRMat & operator=(const NRMat &rhs);
//! assigment operator performing deep copy
NRMat & operator|=(const NRMat &rhs);
//! assign scalar value to the diagonal elements
NRMat & operator=(const T &a);
//! add scalar value to the diagonal elements
NRMat & operator+=(const T &a);
//! subtract scalar value to the diagonal elements
NRMat & operator-=(const T &a);
//! multiply by a scalar value
NRMat & operator*=(const T &a);
//! add given matrix
2004-03-17 04:07:21 +01:00
NRMat & operator+=(const NRMat &rhs);
2010-09-08 18:27:58 +02:00
//! subtract given matrix
2004-03-17 04:07:21 +01:00
NRMat & operator-=(const NRMat &rhs);
2010-09-08 18:27:58 +02:00
//! Hadamard element-wise product
NRMat & operator^=(const NRMat<T> &rhs);
//! add symmetric matrix stored in packed form
2004-03-17 04:07:21 +01:00
NRMat & operator+=(const NRSMat<T> &rhs);
2010-09-08 18:27:58 +02:00
//! subtract symmetric matrix stored in packed form
2004-03-17 04:07:21 +01:00
NRMat & operator-=(const NRSMat<T> &rhs);
2010-09-08 18:27:58 +02:00
//! unary minus
const NRMat operator-() const;
//! add scalar value to all matrix elements and return the result by value
2004-03-17 04:07:21 +01:00
inline const NRMat operator+(const T &a) const;
2010-09-08 18:27:58 +02:00
//! subtract scalar value from all matrix elements and return the result by value
2004-03-17 04:07:21 +01:00
inline const NRMat operator-(const T &a) const;
2010-09-08 18:27:58 +02:00
//! multiply all matrix elements by a scalar value and return the result by value
2004-03-17 04:07:21 +01:00
inline const NRMat operator*(const T &a) const;
2010-09-08 18:27:58 +02:00
//! add given matrix and return the result by value
2004-03-17 04:07:21 +01:00
inline const NRMat operator+(const NRMat &rhs) const;
2010-09-08 18:27:58 +02:00
//! add given symmetric matrix stored in packed form and return the result by value
2004-03-17 04:07:21 +01:00
inline const NRMat operator+(const NRSMat<T> &rhs) const;
2010-09-08 18:27:58 +02:00
//! subtract given matrix and return the result by value
inline const NRMat operator-(const NRMat &rhs) const;
//! subtract given symmetric matrix stored in packed form and return the result by value
2004-03-17 04:07:21 +01:00
inline const NRMat operator-(const NRSMat<T> &rhs) const;
2010-09-08 18:27:58 +02:00
//! multiply by given matrix and return the result by value
const NRMat operator*(const NRMat &rhs) const;
//! multiply by given symmetric matrix stored in packed form and return the result by value
const NRMat operator*(const NRSMat<T> &rhs) const;
//! direct sum of two matrices
const NRMat operator&(const NRMat &rhs) const;
//! direct product of two matrices
const NRMat operator|(const NRMat<T> &rhs) const;
//! multiply by a vector
const NRVec<T> operator*(const NRVec<T> &rhs) const {
NRVec<T> result(nn, rhs.getlocation());
result.gemv((T)0, *this, 'n', (T)1, rhs);
return result;
};
//! multiply this matrix of general type <code>T</code> by vector of type <code>complex<T></code>
2021-04-21 15:04:37 +02:00
const NRVec<std::complex<T> > operator*(const NRVec<std::complex<T> > &rhs) const {
NRVec<std::complex<T> > result(nn, rhs.getlocation());
2010-09-08 18:27:58 +02:00
result.gemv((T)0, *this, 'n', (T)1, rhs);
return result;
};
//! inner product of two matrices (taking conjugation into account in the complex case)
const T dot(const NRMat &rhs) const;
//! direct sum
const NRMat oplus(const NRMat &rhs) const;
//! direct product
const NRMat otimes(const NRMat &rhs, bool reversecolumns = false) const;
//! multiply by diagonal matrix from left
void diagmultl(const NRVec<T> &rhs);
//! multiply by diagonal matrix from right
void diagmultr(const NRVec<T> &rhs);
//! for this matrix \f$A\f$ compute \f$A^\mathrm{T}\cdot{}A\f$
const NRSMat<T> transposedtimes() const;
//! for this matrix \f$A\f$ compute \f$A\cdot{}A^\mathrm{T}\f$
const NRSMat<T> timestransposed() const;
//! sum the rows
const NRVec<T> rsum() const;
//! sum the columns
const NRVec<T> csum() const;
//! orthonormalize this matrix
void orthonormalize(const bool rowcol, const NRSMat<T> *metric = NULL);
//! get the i<sup>th</sup> row
const NRVec<T> row(const int i, int l = -1) const;
//! get the j<sup>th</sup> column
const NRVec<T> column(const int j, int l = -1) const {
NOT_GPU(*this);
if(l < 0) l = nn;
NRVec<T> r(l);
for(register int i=0; i<l; ++i) r[i] = (*this)(i,j);
return r;
};
//! extract the digonal elements of this matrix and store them into a vector
const T* diagonalof(NRVec<T> &, const bool divide = 0, bool cache = false) const;
//! set diagonal elements
void diagonalset(const NRVec<T> &);
//! perform the <b>gemv</b> operation with vector of type <code>T</code>
void gemv(const T beta, NRVec<T> &r, const char trans, const T alpha, const NRVec<T> &x) const { r.gemv(beta, *this, trans, alpha, x); };
//! perform the <b>gemv</b> operation with vector of type <code>complex<T></code>
2021-04-21 15:04:37 +02:00
void gemv(const T beta, NRVec<std::complex<T> > &r, const char trans, const T alpha, const NRVec<std::complex<T> > &x) const { r.gemv(beta, *this, trans, alpha, x); };
2010-09-08 18:27:58 +02:00
//! determine the pointer to the i<sup>th</sup> row
inline T* operator[](const int i);
//! determine the const pointer to the i<sup>th</sup> row
2004-03-17 04:07:21 +01:00
inline const T* operator[](const int i) const;
2010-09-08 18:27:58 +02:00
//! get the reference to the element with indices (i,j)
inline T& operator()(const int i, const int j);
//! get the const reference to the element with indices (i,j)
2004-03-17 04:07:21 +01:00
inline const T& operator()(const int i, const int j) const;
2010-09-08 18:27:58 +02:00
//! get the copy of the element with indices (i,j)
inline const T get_ij(const int i, const int j) const;
//! get the number of rows
2004-03-17 04:07:21 +01:00
inline int nrows() const;
2010-09-08 18:27:58 +02:00
//! get the number of columns
2004-03-17 04:07:21 +01:00
inline int ncols() const;
2010-09-08 18:27:58 +02:00
//! get the number of matrix elements
2013-11-04 15:56:39 +01:00
inline size_t size() const;
2010-09-08 18:27:58 +02:00
//! unformatted input
void get(int fd, bool dimensions = 1, bool transposed = false);
//! unformatted output
void put(int fd, bool dimensions = 1, bool transposed = false) const;
//! formatted output
void fprintf(FILE *f, const char *format, const int modulo) const;
//! formatted input
void fscanf(FILE *f, const char *format);
//! set all matrix elements equal to zero
void clear(){
if(nn&&mm){
2021-05-24 18:45:58 +02:00
copyonwrite(LA_traits<T>::is_plaindata());
2013-11-04 15:56:39 +01:00
LA_traits<T>::clear((*this)[0], (size_t)nn*mm);
2010-09-08 18:27:58 +02:00
}
};
//! resize the matrix
2008-03-14 16:39:45 +01:00
void resize(int n, int m);
2010-09-08 18:27:58 +02:00
2011-01-18 15:37:05 +01:00
//! deallocate the matrix
void dealloc(void) {resize(0,0);}
2010-09-08 18:27:58 +02:00
//! get the pointer to the data
inline operator T*();
2021-04-21 15:04:37 +02:00
2010-09-08 18:27:58 +02:00
//! get the const pointer to the data
2004-03-17 04:07:21 +01:00
inline operator const T*() const;
2010-09-08 18:27:58 +02:00
//! in case of square matrix, transpose the leading minor of order <code>n</code>
NRMat& transposeme(const int n = 0);
//! conjugate a square matrix
NRMat& conjugateme();
//! transpose this matrix and return the result by value
const NRMat transpose(bool conj = false) const;
//! conjugate this matrix and return the result by value
2004-03-17 04:07:21 +01:00
const NRMat conjugate() const;
2010-09-08 18:27:58 +02:00
//! extract specified submatrix
const NRMat submatrix(const int fromrow, const int torow, const int fromcol, const int tocol) const;
//! store given matrix at given position into the current matrix
void storesubmatrix(const int fromrow, const int fromcol, const NRMat &rhs);
//! perform the \b gemm operation
void gemm(const T &beta, const NRMat &a, const char transa, const NRMat &b, const char transb, const T &alpha);
//! compute the norm of this matrix
const typename LA_traits<T>::normtype norm(const T scalar = (T)0) const;
//! add up a scalar multiple of given matrix to the current matrix
void axpy(const T alpha, const NRMat &x);
//! maximal element in the absolute value
2004-03-17 04:07:21 +01:00
inline const T amax() const;
2010-09-08 18:27:58 +02:00
//! minimal element in the absolute value
inline const T amin() const;
//! determine the sum of the diagonal elements
2004-03-17 04:07:21 +01:00
const T trace() const;
2010-09-08 18:27:58 +02:00
//! swap the order of the rows of the current matrix
NRMat & swap_rows();
//! swap the order of the columns of the current matrix
NRMat & swap_cols();
//! swap the order of the rows and columns of the current matrix
NRMat & swap_rows_cols();
2018-11-07 19:21:59 +01:00
// LV - swapping of columns i and j
NRMat & swap_cols(const int i, const int j);
// LV - swapping of rows i and j
NRMat & swap_rows(const int i, const int j);
2021-05-19 22:29:47 +02:00
//rotate rows or columns through an angle
NRMat & rotate_cols(const int i, const int j, const T phi);
NRMat & rotate_rows(const int i, const int j, const T phi);
2010-09-08 18:27:58 +02:00
//! multiply by sparse matrix
2010-01-11 11:12:28 +01:00
SparseSMat<T> operator*(const SparseSMat<T> &rhs) const;
2010-09-08 18:27:58 +02:00
//! explicit constructor converting sparse matrix into \c NRMat<T> object
2004-03-17 04:07:21 +01:00
explicit NRMat(const SparseMat<T> &rhs); // dense from sparse
2010-09-08 18:27:58 +02:00
//! explicit constructor converting sparse symmetric matrix into \c NRMat<T> object
explicit NRMat(const SparseSMat<T> &rhs);
2011-01-18 15:37:05 +01:00
//! explicit constructor converting sparse CSR matrix into \c NRMat<T> object
explicit NRMat(const CSRMat<T> &rhs);
2010-09-08 18:27:58 +02:00
//! add up given sparse matrix
2004-03-17 04:07:21 +01:00
NRMat & operator+=(const SparseMat<T> &rhs);
2010-09-08 18:27:58 +02:00
//! subtract given sparse matrix
2004-03-17 04:07:21 +01:00
NRMat & operator-=(const SparseMat<T> &rhs);
2010-09-08 18:27:58 +02:00
//! perform the \b gemm operation
void gemm(const T &beta, const SparseMat<T> &a, const char transa, const NRMat &b, const char transb, const T &alpha);
inline void simplify() {};
bool issymmetric() const { return 0; };
2018-11-07 19:21:59 +01:00
const typename LA_traits<T>::normtype nonsymmetry() const;
const typename LA_traits<T>::normtype nonhermiticity() const;
2005-10-12 13:14:55 +02:00
#ifndef NO_STRASSEN
2010-09-08 18:27:58 +02:00
//! Strassen's multiplication (better than \f$\mathacal{O}(n^3)\f$, analogous syntax to \see NRMat<T>::gemm() )
void strassen(const T beta, const NRMat &a, const char transa, const NRMat &b, const char transb, const T alpha);
2004-03-17 04:07:21 +01:00
void s_cutoff(const int,const int,const int,const int) const;
2005-10-12 13:14:55 +02:00
#endif
void swap(NRMat &rhs) //more efficient swap than via tmp and constructors and operator=
{
int tmpnn=nn; nn=rhs.nn; rhs.nn=tmpnn;
int tmpmm=mm; mm=rhs.mm; rhs.mm=tmpmm;
#ifdef MATPTR
T **
#else
T *
#endif
tmpv=v; v=rhs.v; rhs.v=tmpv;
int *tmpcount=count; count=rhs.count; rhs.count=tmpcount;
#ifdef CUDALA
GPUID tmplocation=location; location=rhs.location; rhs.location=tmplocation;
#endif
}
NRMat<typename LA_traits<T>::normtype> abs() const; //elementwise absolute values
NRMat<typename LA_traits<T>::normtype> diffabs(const NRMat &rhs) const; //difference of absolute values
2004-03-17 04:07:21 +01:00
};
2009-11-12 22:01:19 +01:00
}//namespace
2010-09-08 18:27:58 +02:00
2005-02-18 23:08:15 +01:00
//due to mutual includes this has to be after full class declaration
#include "vec.h"
#include "smat.h"
#include "sparsemat.h"
2010-01-07 17:10:12 +01:00
#include "sparsesmat.h"
#include "permutation.h"
2005-02-18 23:08:15 +01:00
2009-11-12 22:01:19 +01:00
namespace LA {
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* matrix constructor
* @param[in] n number of rows of the matrix being created
* @param[in] m number of cols of the matrix being created
* @param[in] loc location for storing the matrix
* @see count, v, location
******************************************************************************/
2004-03-17 04:07:21 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
NRMat<T>::NRMat(const int n, const int m, const GPUID loc) : nn(n), mm(m), count(new int) {
T* p;
2004-03-17 04:07:21 +01:00
*count = 1;
2013-11-04 15:56:39 +01:00
const size_t nm = (size_t)n*m;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
location = (loc==undefined?DEFAULT_LOC:loc);
if(location == cpu) {
#endif
#ifdef MATPTR
v = new T*[n];
p = v[0] = new T[nm];
for (int i=1; i<n; i++) v[i] = v[i-1] + m;
#else
p = v = new T[nm];
#endif
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
}else{
const T val = 0;
v = (T*) gpualloc(nm*sizeof(T));
2010-06-25 17:28:19 +02:00
}
#endif
2004-03-17 04:07:21 +01:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* matrix constructor
* @param[in] a value of type T intended for matrix inicialization
* @param[in] n number of rows of the matrix being created
* @param[in] m number of cols of the matrix being created
* @see count, v
******************************************************************************/
2004-03-17 04:07:21 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
NRMat<T>::NRMat(const T &a, const int n, const int m, const GPUID loc) : nn(n), mm(m), count(new int) {
2013-11-04 15:56:39 +01:00
const size_t nm = (size_t)n*m;
2010-09-08 18:27:58 +02:00
T *p;
*count = 1;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
location = (loc==undefined?DEFAULT_LOC:loc);
if(location==cpu){
#endif
#ifdef MATPTR
v = new T*[n];
p = v[0] = new T[nm];
for (register int i=1; i<n; i++) v[i] = v[i-1] + m;
#else
p = v = new T[nm];
#endif
if (!LA_traits<T>::is_plaindata() || a != (T)0){
2010-09-08 18:27:58 +02:00
for (register int i=0; i<nm; i++) *p++ = a;
}else{
memset(p, 0, nm*sizeof(T));
}
#ifdef CUDALA
}else{
if(sizeof(T)%sizeof(float) != 0)laerror("memory alignment error");
v = (T*)gpualloc(nm*sizeof(T));
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
2010-09-08 18:27:58 +02:00
smart_gpu_set(nm, a, v);
}
2010-06-25 17:28:19 +02:00
#endif
2010-09-08 18:27:58 +02:00
}
2010-06-25 17:28:19 +02:00
/***************************************************************************//**
* inline constructor creating vector from an array
******************************************************************************/
template <typename T> template<int R, int C>
inline NRMat<T>::NRMat(const T (&a)[R][C]) : count(new int) {
nn = R;
mm = C;
*count = 1;
#ifdef CUDALA
if(location==cpu){
#endif
#ifdef MATPTR
v = new T*[nn];
v[0] = new T[nn*mm];
for (register int i=1; i<n; i++) v[i] = v[i-1] + m;
if(LA_traits<T>::is_plaindata()) memcpy(v[0], a, nn*mm*sizeof(T));
else for( int i=0; i<nn; i++) for(int j=0; j<mm; ++j) v[i][j]=a[i][j];
#else
v = new T[nn*mm];
if(LA_traits<T>::is_plaindata()) memcpy(v, a, nn*mm*sizeof(T));
else for( int i=0; i<nn; i++) for(int j=0; j<mm; ++j) v[i*mm+j] = a[i][j];
#endif
#ifdef CUDALA
}else{
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
v = (T*) gpualloc(nn*mm*sizeof(T));
cublasSetVector(nm, sizeof(T), a, 1, v, 1);
}
#endif
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* matrix constructor
* @param[in] a value of type T intended for matrix inicialization
* @param[in] n number of rows of the matrix being created
* @param[in] m number of cols of the matrix being created
* @see count, v
******************************************************************************/
template <typename T>
NRMat<T>::NRMat(const T &a, const int n, const int m) : nn(n), mm(m), count(new int) {
2013-11-04 15:56:39 +01:00
const size_t nm = (size_t)n*m;
2004-03-17 04:07:21 +01:00
T *p;
*count = 1;
2010-09-08 18:27:58 +02:00
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
location = DEFAULT_LOC;
if(location==cpu){
#endif
#ifdef MATPTR
v = new T*[n];
p = v[0] = new T[nm];
for (register int i=1; i<n; i++) v[i] = v[i-1] + m;
#else
2013-11-04 15:56:39 +01:00
p = v = new T[nm];
2010-09-08 18:27:58 +02:00
#endif
if (!LA_traits<T>::is_plaindata() || a != (T)0)
2010-09-08 18:27:58 +02:00
for (register int i=0; i<nm; i++) *p++ = a;
2004-03-17 04:07:21 +01:00
else
2010-09-08 18:27:58 +02:00
memset(p, 0, nm*sizeof(T));
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
}else{
v = (T*)gpualloc(nm*sizeof(T));
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
2010-09-08 18:27:58 +02:00
smart_gpu_set(nm, a, v);
2010-06-25 17:28:19 +02:00
}
#endif
2004-03-17 04:07:21 +01:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* matrix constructor
* @param[in] a pointer to values of type T intended for matrix inicialization
* @param[in] n number of rows of the matrix being created
* @param[in] m number of cols of the matrix being created
* @see count, v
******************************************************************************/
2004-03-17 04:07:21 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
NRMat<T>::NRMat(const T *a, const int n, const int m) : nn(n), mm(m), count(new int) {
2013-11-04 15:56:39 +01:00
const size_t nm = (size_t)n*m;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
location = DEFAULT_LOC;
2010-06-25 17:28:19 +02:00
#endif
2004-03-17 04:07:21 +01:00
*count = 1;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
if(location==cpu){
#endif
#ifdef MATPTR
v = new T*[n];
v[0] = new T[nm];
for (register int i=1; i<n; i++) v[i] = v[i-1] + m;
if(LA_traits<T>::is_plaindata()) memcpy(v[0], a, nm*sizeof(T));
else for(int i=0; i<nm; ++i) v[0][i] = a[i];
2010-09-08 18:27:58 +02:00
#else
v = new T[nm];
if(LA_traits<T>::is_plaindata()) memcpy(v, a, nm*sizeof(T));
else for(int i=0; i<nm; ++i) v[i] = a[i];
2010-09-08 18:27:58 +02:00
#endif
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
}else{
v = (T*) gpualloc(nm*sizeof(T));
if(!LA_traits<T>::is_plaindata()) laerror("only implemented for plain data");
2010-09-08 18:27:58 +02:00
cublasSetVector(nm, sizeof(T), a, 1, v, 1);
}
2010-06-25 17:28:19 +02:00
#endif
2004-03-17 04:07:21 +01:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* copy constructor implementing shallow copy
* @param[in] rhs reference object to be copied
* @see count, v
******************************************************************************/
2004-03-17 04:07:21 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
NRMat<T>::NRMat(const NRMat &rhs) {
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
location = rhs.location;
2010-06-25 17:28:19 +02:00
#endif
2004-03-17 04:07:21 +01:00
nn = rhs.nn;
mm = rhs.mm;
count = rhs.count;
v = rhs.v;
if (count) ++(*count);
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* create matrix from a \c NRSMat object
* @param[in] rhs \c NRSMat input object to be converted
* @see count, v, vec.h, NRSMat<T>
******************************************************************************/
2004-03-17 04:07:21 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
NRMat<T>::NRMat(const NRSMat<T> &rhs) {
NOT_GPU(rhs);
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
location = rhs.location;
2010-06-25 17:28:19 +02:00
#endif
2010-09-08 18:27:58 +02:00
int i(0), j(0), k(0);
2004-03-17 04:07:21 +01:00
nn = mm = rhs.nrows();
2019-11-13 23:22:25 +01:00
count = new int;
2004-03-17 04:07:21 +01:00
*count = 1;
#ifdef MATPTR
v = new T*[nn];
2013-11-04 15:56:39 +01:00
v[0] = new T[(size_t)mm*nn];
2004-03-17 04:07:21 +01:00
for (int i=1; i<nn; i++) v[i] = v[i-1] + mm;
#else
2013-11-04 15:56:39 +01:00
v = new T[(size_t)mm*nn];
2004-03-17 04:07:21 +01:00
#endif
#ifdef MATPTR
2010-09-08 18:27:58 +02:00
for (i=0; i<nn; i++){
for (j=0; j<=i; j++){
v[i][j] = v[j][i] = rhs[k++];
}
}
2004-03-17 04:07:21 +01:00
#else
2010-09-08 18:27:58 +02:00
for (i=0; i<nn; i++){
for (j=0; j<=i; j++){
2013-11-04 15:56:39 +01:00
v[i*(size_t)nn + j] = v[j*(size_t)nn + i] = rhs[k++];
2010-09-08 18:27:58 +02:00
}
}
2004-03-17 04:07:21 +01:00
#endif
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* create matrix from a vector (shallow copy)
* @param[in] rhs NRVec vector containing the data
* @param[in] n number of rows of the matrix being created
* @param[in] m number of cols of the matrix being created
* @see count, v, vec.h
******************************************************************************/
2004-03-17 04:07:21 +01:00
#ifndef MATPTR
template <typename T>
2009-01-20 15:39:50 +01:00
NRMat<T>::NRMat(const NRVec<T> &rhs, const int n, const int m, const int offset)
2004-03-17 04:07:21 +01:00
{
2013-11-04 15:56:39 +01:00
if (offset < 0 || (size_t)n*m + offset > rhs.nn) laerror("matrix dimensions and offset incompatible with vector length");
2018-11-07 19:21:59 +01:00
if(offset!=0) std::cout << "dangerous: if the underlying vector is deallocated before the matrix, wrong delete[] will result for nonzero offset!!!\n";
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
location=rhs.location;
2010-06-25 17:28:19 +02:00
#endif
2004-03-17 04:07:21 +01:00
nn = n;
mm = m;
count = rhs.count;
2010-09-08 18:27:58 +02:00
v = rhs.v + offset;//!< make just shallow copy
(*count)++;//!< therefore increase the reference counter
2004-03-17 04:07:21 +01:00
}
#endif
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* \c NRMat + \c NRSmat via operator +=
* @param[in] rhs NRSMat matrix to be subtracted from current matrix
* @return result of the subtraction
* @see NRMat<T>::operator+=(const NRSMat<T> &)
******************************************************************************/
2004-03-17 04:07:21 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
inline const NRMat<T> NRMat<T>::operator+(const NRSMat<T> &rhs) const {
2004-03-17 04:07:21 +01:00
return NRMat<T>(*this) += rhs;
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* \c NRMat - \c NRSmat via operator -=
* @param[in] rhs NRSMat matrix to be subtracted from current matrix
* @return result of the subtraction
* @see NRMat<T>::operator-=(const NRSMat<T> &)
******************************************************************************/
2004-03-17 04:07:21 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
inline const NRMat<T> NRMat<T>::operator-(const NRSMat<T> &rhs) const {
2004-03-17 04:07:21 +01:00
return NRMat<T>(*this) -= rhs;
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* @param[in] i row number
* @return pointer to the first element in the i-th row
******************************************************************************/
2004-03-17 04:07:21 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
inline T* NRMat<T>::operator[](const int i) {
2004-03-17 04:07:21 +01:00
#ifdef DEBUG
2010-09-08 18:27:58 +02:00
if (_LA_count_check && *count != 1) laerror("matrix with *count>1 used as l-value");
if (i < 0 || i >= nn) laerror("Mat [] out of range");
if (!v) laerror("unallocated matrix");
#endif
#ifdef MATPTR
return v[i];
#else
2013-11-04 15:56:39 +01:00
return v + i*(size_t)mm;
2010-09-08 18:27:58 +02:00
#endif
2004-03-17 04:07:21 +01:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* @param[in] i row number
* @return const pointer to the first element in the i-th row
******************************************************************************/
2004-03-17 04:07:21 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
inline const T* NRMat<T>::operator[](const int i) const {
2004-03-17 04:07:21 +01:00
#ifdef DEBUG
2010-09-08 18:27:58 +02:00
if (i < 0 || i >= nn) laerror("index out of range");
if (!v) laerror("unallocated matrix");
#endif
NOT_GPU(*this);
#ifdef MATPTR
return v[i];
#else
2013-11-04 15:56:39 +01:00
return v + i*(size_t)mm;
2010-09-08 18:27:58 +02:00
#endif
2004-03-17 04:07:21 +01:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* for a given matrix \f$A\f$, determine the element with indices (i,j)
* @param[in] i row number
* @param[in] j col number
* @return reference to \f$A_{i,j}\f$
* @see NRMat<T>::count
******************************************************************************/
2004-03-17 04:07:21 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
inline T& NRMat<T>::operator()(const int i, const int j){
2004-03-17 04:07:21 +01:00
#ifdef DEBUG
2010-09-08 18:27:58 +02:00
if (_LA_count_check && *count != 1) laerror("NRMat::operator(,) used as l-value for a matrix with count > 1");
if (i < 0 || i >= nn && nn > 0 || j < 0 || j >= mm && mm > 0) laerror("index out of range");
if (!v) laerror("unallocated matrix");
#endif
NOT_GPU(*this);
#ifdef MATPTR
return v[i][j];
#else
2013-11-04 15:56:39 +01:00
return v[i*(size_t)mm + j];
2010-09-08 18:27:58 +02:00
#endif
2004-03-17 04:07:21 +01:00
}
2010-06-25 17:28:19 +02:00
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* for a given matrix \f$A\f$, determine the element with indices (i,j)
* @param[in] i row number
* @param[in] j col number
* @return const reference to \f$A_{i,j}\f$
******************************************************************************/
2004-03-17 04:07:21 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
inline const T& NRMat<T>::operator()(const int i, const int j) const{
T ret;
2004-03-17 04:07:21 +01:00
#ifdef DEBUG
2010-09-08 18:27:58 +02:00
if (i<0 || i>=nn && nn>0 || j<0 || j>=mm && mm>0) laerror("index out of range");
if (!v) laerror("unallocated matrix");
#endif
NOT_GPU(*this);
#ifdef MATPTR
return v[i][j];
#else
2013-11-04 15:56:39 +01:00
return v[i*(size_t)mm + j];
2010-09-08 18:27:58 +02:00
#endif
}
/***************************************************************************//**
* for a given matrix \f$A\f$, determine the element with indices (i,j)
* @param[in] i row number
* @param[in] j col number
* @return const reference to \f$A_{i,j}\f$
******************************************************************************/
template <typename T>
inline const T NRMat<T>::get_ij(const int i, const int j) const{
T ret;
#ifdef DEBUG
if (i<0 || i>=nn || j<0 || j>=mm) laerror("index out of range");
if (!v) laerror("unallocated matrix");
2004-03-17 04:07:21 +01:00
#endif
2010-09-08 18:27:58 +02:00
#ifdef CUDALA
if(location == cpu){
#endif
#ifdef MATPTR
return v[i][j];
#else
2013-11-04 15:56:39 +01:00
return v[i*(size_t)mm + j];
2010-09-08 18:27:58 +02:00
#endif
#ifdef CUDALA
}else{
2013-11-04 15:56:39 +01:00
const size_t pozice = i*(size_t)mm + j;
2010-09-08 18:27:58 +02:00
gpuget(1, sizeof(T), v + pozice, &ret);
return ret;
}
2004-03-17 04:07:21 +01:00
#endif
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* @return number of rows
******************************************************************************/
2004-03-17 04:07:21 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
inline int NRMat<T>::nrows() const{
2004-03-17 04:07:21 +01:00
return nn;
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* @return number of columns
******************************************************************************/
2004-03-17 04:07:21 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
inline int NRMat<T>::ncols() const{
2004-03-17 04:07:21 +01:00
return mm;
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* @return number of elements
******************************************************************************/
2005-02-18 23:08:15 +01:00
template <typename T>
2013-11-04 15:56:39 +01:00
inline size_t NRMat<T>::size() const{
return (size_t)nn*mm;
2005-02-18 23:08:15 +01:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* @return pointer of general type T to the underlying data structure
******************************************************************************/
2004-03-17 04:07:21 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
inline NRMat<T>::operator T*(){
#ifdef DEBUG
if (!v) laerror("unallocated matrix");
#endif
#ifdef MATPTR
return v[0];
#else
return v;
#endif
2004-03-17 04:07:21 +01:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* @return const pointer of general type T to the underlying data
******************************************************************************/
2004-03-17 04:07:21 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
inline NRMat<T>::operator const T*() const{
#ifdef DEBUG
if (!v) laerror("unallocated matrix");
#endif
#ifdef MATPTR
return v[0];
#else
return v;
#endif
}
/***************************************************************************//**
* for this real matrix \f$A\f$, determine the first element
* with largest absolute value
* @return \f$A_{l,m}\f$ which maximizes \f$\left|A_{i,j}\right|\f$
******************************************************************************/
template<>
inline const double NRMat<double>::amax() const{
#ifdef CUDALA
if(location == cpu){
2004-03-17 04:07:21 +01:00
#endif
2010-09-08 18:27:58 +02:00
#ifdef MATPTR
return v[0][cblas_idamax(nn*mm, v[0], 1) - 1];
#else
return v[cblas_idamax(nn*mm, v, 1) - 1];
#endif
#ifdef CUDALA
}else{
double ret(0.0);
2013-11-04 15:56:39 +01:00
const size_t pozice = cublasIdamax((size_t)nn*mm, v, 1) - 1;
2010-09-08 18:27:58 +02:00
TEST_CUBLAS("cublasIdamax");
gpuget(1, sizeof(double), v + pozice, &ret);
return ret;
}
2004-03-17 04:07:21 +01:00
#endif
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* for this real matrix \f$A\f$, determine the first element
* with smallest absolute value
* @return \f$A_{l,m}\f$ which minimizes \f$\left|A_{i,j}\right|\f$
******************************************************************************/
2005-11-20 14:46:00 +01:00
template<>
2010-09-08 18:27:58 +02:00
inline const double NRMat<double>::amin() const{
double ret(0.0);
#ifdef CUDALA
if(location == cpu){
#endif
// idamin seems not to be supported
2013-11-04 15:56:39 +01:00
const size_t nm = (size_t)nn*mm;
2010-09-08 18:27:58 +02:00
double val(0.0);
int index(-1);
ret = std::numeric_limits<double>::max();
for(register int i=0; i < nm; i++){
#ifdef MATPTR
val = std::abs(v[0][i]);
#else
val = std::abs(v[i]);
#endif
if(val < ret){ index = i; ret = val; }
}
#ifdef MATPTR
ret = v[0][index];
#else
ret = v[index];
#endif
#ifdef CUDALA
}else{
2013-11-04 15:56:39 +01:00
const size_t pozice = cublasIdamin((size_t)nn*mm, v, 1) - 1;
2010-09-08 18:27:58 +02:00
TEST_CUBLAS("cublasIdamin");
gpuget(1, sizeof(double), v + pozice, &ret);
}
2004-03-17 04:07:21 +01:00
#endif
2010-09-08 18:27:58 +02:00
return ret;
2004-03-17 04:07:21 +01:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* for this complex matrix \f$A\f$, determine the smallest index of the maximum
* magnitude element, i.e. maximal element in the 1-norm
* @return \f$A_{l,m}\f$ which maximizes \f$\left\{\left|\Re{}A_{i,j}\right|+\left|\Im{}A_{i,j}\right|\right}\f$
******************************************************************************/
2005-11-20 14:46:00 +01:00
template<>
2021-04-21 15:04:37 +02:00
inline const std::complex<double> NRMat<std::complex<double> >::amax() const{
2010-09-08 18:27:58 +02:00
#ifdef CUDALA
if(location == cpu){
2004-03-17 04:07:21 +01:00
#endif
2010-09-08 18:27:58 +02:00
#ifdef MATPTR
return v[0][cblas_izamax(nn*mm, v[0], 1) - 1];
#else
return v[cblas_izamax(nn*mm, v, 1) - 1];
#endif
#ifdef CUDALA
}else{
2021-04-21 15:04:37 +02:00
std::complex<double> ret(0.0, 0.0);
2013-11-04 15:56:39 +01:00
const size_t pozice = cublasIzamax((size_t)nn*mm, (cuDoubleComplex*)v, 1) - 1;
2010-09-08 18:27:58 +02:00
TEST_CUBLAS("cublasIzamax");
2021-04-21 15:04:37 +02:00
gpuget(1, sizeof(std::complex<double>), v + pozice, &ret);
2010-09-08 18:27:58 +02:00
return ret;
}
#endif
2004-03-17 04:07:21 +01:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* for this complex matrix \f$A\f$, determine the smallest index of the minimum
* magnitude element, i.e. minimal element in the 1-norm
* @return \f$A_{l,m}\f$ which minimizes \f$\left\{\left|\Re{}A_{i,j}\right|+\left|\Im{}A_{i,j}\right|\right}\f$
******************************************************************************/
template<>
2021-04-21 15:04:37 +02:00
inline const std::complex<double> NRMat<std::complex<double> >::amin() const{
std::complex<double> ret(0.0, 0.0);
2010-09-08 18:27:58 +02:00
#ifdef CUDALA
if(location == cpu){
#endif
// idamin seems not to be supported
2013-11-04 15:56:39 +01:00
const size_t nm = (size_t)nn*mm;
2010-09-08 18:27:58 +02:00
int index(-1);
double val(0.0), min_val(0.0);
2021-04-21 15:04:37 +02:00
std::complex<double> z_val(0.0, 0.0);
2010-09-08 18:27:58 +02:00
min_val = std::numeric_limits<double>::max();
for(register int i=0; i < nm; i++){
#ifdef MATPTR
z_val = v[0][i];
#else
z_val = v[i];
#endif
val = std::abs(z_val.real()) + std::abs(z_val.imag());
if(val < min_val){ index = i; min_val = val; }
}
#ifdef MATPTR
ret = v[0][index];
#else
ret = v[index];
#endif
#ifdef CUDALA
}else{
2013-11-04 15:56:39 +01:00
const size_t pozice = cublasIzamin((size_t)nn*mm, (cuDoubleComplex*)v, 1) - 1;
2010-09-08 18:27:58 +02:00
TEST_CUBLAS("cublasIzamin");
2021-04-21 15:04:37 +02:00
gpuget(1, sizeof(std::complex<double>), v + pozice, &ret);
2010-09-08 18:27:58 +02:00
}
#endif
return ret;
}
2004-03-17 04:07:21 +01:00
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* destructor for general type
* @see NRMat<T>::count
******************************************************************************/
2004-03-17 17:39:07 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
NRMat<T>::~NRMat() {
if(!count) return;
if(--(*count) <= 0) {
if (v){
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
if(location == cpu){
2004-03-17 17:39:07 +01:00
#endif
2010-09-08 18:27:58 +02:00
#ifdef MATPTR
delete[] (v[0]);
#endif
delete[] v;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
}else{
gpufree(v);
2010-06-25 17:28:19 +02:00
}
#endif
2004-03-17 17:39:07 +01:00
}
delete count;
}
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* assigment operator for general type between NRMat and NRMat
* @see count
* @return reference to the newly assigned matrix
******************************************************************************/
2004-03-17 17:39:07 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
NRMat<T> & NRMat<T>::operator=(const NRMat<T> &rhs) {
if (this != &rhs){
if (count){
if (--(*count) ==0 ){
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
if(location == cpu){
2004-03-17 17:39:07 +01:00
#endif
2010-09-08 18:27:58 +02:00
#ifdef MATPTR
delete[] (v[0]);
#endif
delete[] v;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
}else{ gpufree(v); }
2010-06-25 17:28:19 +02:00
#endif
2010-09-08 18:27:58 +02:00
delete count;
2005-02-01 00:08:03 +01:00
}
2010-09-08 18:27:58 +02:00
}
2004-03-17 17:39:07 +01:00
v = rhs.v;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
location = rhs.location;
2010-06-25 17:28:19 +02:00
#endif
2004-03-17 17:39:07 +01:00
nn = rhs.nn;
mm = rhs.mm;
count = rhs.count;
2010-09-08 18:27:58 +02:00
if(count) (*count)++;
}
2004-03-17 17:39:07 +01:00
return *this;
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* perform an explicit deep copy of \c NRMat object
* @see count
* @return reference to the newly copied matrix
******************************************************************************/
2004-03-17 17:39:07 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
NRMat<T> & NRMat<T>::operator|=(const NRMat<T> &rhs) {
if(this == &rhs) return *this; // test to avoid self-assignment
2010-06-25 17:28:19 +02:00
*this = rhs;
this->copyonwrite();
2004-03-17 17:39:07 +01:00
return *this;
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* create own deep copy
* @see NRMat<T>::count, NRMat<T>::operator|=()
******************************************************************************/
2004-03-17 17:39:07 +01:00
template <typename T>
2013-11-04 15:56:39 +01:00
void NRMat<T>::copyonwrite(bool detachonly) {
if(!count)
{
if(nn||mm) laerror("nonempty matrix without reference count encountered");
if(_LA_warn_empty_copyonwrite) std::cout <<"Warning: copyonwrite of empty matrix\n";
return;
}
2010-09-08 18:27:58 +02:00
if(*count > 1){
(*count)--;
count = new int;
*count = 1;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
if(location == cpu){ //matrix is in CPU memory
#endif
#ifdef MATPTR
T **newv = new T*[nn];
2013-11-04 15:56:39 +01:00
newv[0] = new T[(size_t)mm*nn];
if(!detachonly)
{
if(LA_traits<T>::is_plaindata()) memcpy(newv[0], v[0], (size_t)mm*nn*sizeof(T));
else
{
for(int i=0; i<nn*mm; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
}
}
2010-09-08 18:27:58 +02:00
v = newv;
for(register int i=1; i<nn; i++) v[i] = v[i-1] + mm;
#else
2013-11-04 15:56:39 +01:00
T *newv = new T[(size_t)mm*nn];
if(!detachonly)
{
if(LA_traits<T>::is_plaindata()) memcpy(newv, v, (size_t)mm*nn*sizeof(T));
else
{
for(int i=0; i<nn*mm; ++i) {newv[i]=v[i]; LA_traits<T>::copyonwrite(newv[i]);}
}
}
2010-09-08 18:27:58 +02:00
v = newv;
#endif
#ifdef CUDALA
}else{ //matrix is in GPU memory
if(!LA_traits<T>::is_plaindata()) laerror("nested types not supported on gpu memory");
2013-11-04 15:56:39 +01:00
T *newv = (T *) gpualloc((size_t)mm*nn*sizeof(T));
2010-09-08 18:27:58 +02:00
if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem");
2013-11-04 15:56:39 +01:00
if(!detachonly) cublasScopy(nn*mm*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
2010-09-08 18:27:58 +02:00
TEST_CUBLAS("cublasScopy");
v = newv;
2010-06-25 17:28:19 +02:00
}
2004-03-17 17:39:07 +01:00
#endif
}
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* resize given matrix
* @param[in] n number of rows
* @param[in] m number of cols
* @see count, NRMat<T>::copyonwrite(), NRMat<T>::operator|=()
* @return reference to the newly copied matrix
******************************************************************************/
2004-03-17 17:39:07 +01:00
template <typename T>
2010-09-08 18:27:58 +02:00
void NRMat<T>::resize(int n, int m) {
2004-03-17 17:39:07 +01:00
#ifdef DEBUG
2010-09-08 18:27:58 +02:00
if (n<0 || m<0) laerror("illegal dimensions");
2004-03-17 17:39:07 +01:00
#endif
2008-03-14 16:37:20 +01:00
2010-09-08 18:27:58 +02:00
//allow trivial dimensions
if(n == 0 || m == 0) m = n =0;
2008-03-14 16:37:20 +01:00
2010-09-08 18:27:58 +02:00
if(count){
if(n==0 && m==0){
if(--(*count) <= 0){
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
if(location==cpu){
2010-06-25 17:28:19 +02:00
#endif
2010-09-08 18:27:58 +02:00
#ifdef MATPTR
if(v) delete[] (v[0]);
#endif
if(v) delete[] v;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
}
2010-09-08 18:27:58 +02:00
else { gpufree(v); }
#endif
delete count;
}
count = 0;
nn = mm = 0;
v = 0;
return;
}
/*
if we have more than one reference to this matrix, set count to NULL
in order to reach the if-branch below where new memory resources are allocated
*/
if(*count > 1){
(*count)--;
count = 0;
nn = mm = 0;
v = 0;
}
}
if(!count){
count = new int;
*count = 1;
nn = n;
2004-03-17 17:39:07 +01:00
mm = m;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
if(location==cpu){
#endif
#ifdef MATPTR
v = new T*[nn];
2013-11-04 15:56:39 +01:00
v[0] = new T[(size_t)m*n];
2010-09-08 18:27:58 +02:00
for (register int i=1; i< n; i++) v[i] = v[i-1] + m;
#else
2013-11-04 15:56:39 +01:00
v = new T[(size_t)m*n];
2010-09-08 18:27:58 +02:00
#endif
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
}else{
2013-11-04 15:56:39 +01:00
v = (T *) gpualloc((size_t)n*m*sizeof(T));
2010-09-08 18:27:58 +02:00
}
2010-06-25 17:28:19 +02:00
#endif
2004-03-17 17:39:07 +01:00
return;
}
2010-09-08 18:27:58 +02:00
// at this point *count = 1, check if resize is necessary
if (n != nn || m != mm) {
nn = n;
mm = m;
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
if(location==cpu){
#endif
#ifdef MATPTR
delete[] (v[0]);
#endif
delete[] v;
#ifdef MATPTR
v = new T*[nn];
2013-11-04 15:56:39 +01:00
v[0] = new T[(size_t)m*n];
2010-09-08 18:27:58 +02:00
for (int i=1; i< n; i++) v[i] = v[i-1] + m;
#else
2013-11-04 15:56:39 +01:00
v = new T[(size_t)m*n];
2010-09-08 18:27:58 +02:00
#endif
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
}else{
gpufree(v);
2013-11-04 15:56:39 +01:00
v=(T *) gpualloc((size_t)n*m*sizeof(T));
2010-09-08 18:27:58 +02:00
}
2004-03-17 17:39:07 +01:00
#endif
2010-09-08 18:27:58 +02:00
}
2004-03-17 17:39:07 +01:00
}
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* complexify a given matrix \f$A\f$
* @param[in] rhs matrix \f$A\f$ intended for this operation
* @return matrix \f$B\f$ where \f$\Re B=A\f$ and \f$\Im B = 0\f$
******************************************************************************/
2006-04-01 06:48:01 +02:00
template<typename T>
2021-04-21 15:04:37 +02:00
NRMat<std::complex<T> > complexify(const NRMat<T> &rhs) {
2010-09-08 18:27:58 +02:00
NOT_GPU(rhs);
2004-03-17 17:39:07 +01:00
2021-04-21 15:04:37 +02:00
NRMat<std::complex<T> > r(rhs.nrows(), rhs.ncols(), rhs.getlocation());
2010-09-08 18:27:58 +02:00
for(register int i=0; i<rhs.nrows(); ++i){
for(register int j=0; j<rhs.ncols(); ++j) r(i,j) = rhs(i,j);
}
return r;
}
2004-03-17 17:39:07 +01:00
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* output operator
* @param[in,out] s output stream
* @param[in] x NRMat matrix to be prited out
* @return modified stream
******************************************************************************/
2006-10-21 17:32:53 +02:00
template <typename T>
2010-09-08 18:27:58 +02:00
std::ostream& operator<<(std::ostream &s, const NRMat<T> &x) {
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
if(x.getlocation() == cpu){
#endif
int i(0),j(0);
int n(x.nrows()), m(x.ncols());
s << n << ' ' << m << '\n';
for(i=0; i<n; i++){
for(j=0; j<m; j++){
// endl cannot be used in the conditional expression, since it is an overloaded function
s << (typename LA_traits_io<T>::IOtype) x[i][j] << (j==m-1 ? '\n' : ' ');
}
2010-06-25 17:28:19 +02:00
}
2010-09-08 18:27:58 +02:00
return s;
#ifdef CUDALA
}else{
NRMat<T> tmp = x;
2010-06-25 17:28:19 +02:00
tmp.moveto(cpu);
2010-09-08 18:27:58 +02:00
return s << tmp;
}
2010-06-25 17:28:19 +02:00
#endif
}
2006-10-21 17:32:53 +02:00
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* input operator
* @param[in,out] s input stream
* @param[in] x NRMat matrix for storing the input
* @return modified stream
******************************************************************************/
2006-10-21 17:32:53 +02:00
template <typename T>
2009-11-12 22:01:19 +01:00
std::istream& operator>>(std::istream &s, NRMat<T> &x)
2010-06-25 17:28:19 +02:00
{
#ifdef CUDALA
2010-09-08 18:27:58 +02:00
if(x.getlocation() == cpu){
2010-06-25 17:28:19 +02:00
#endif
2010-09-08 18:27:58 +02:00
int i(0), j(0), n(0), m(0);
s >> n >> m;
x.resize(n, m);
2006-10-21 17:32:53 +02:00
typename LA_traits_io<T>::IOtype tmp;
2010-09-08 18:27:58 +02:00
for(i=0;i<n;i++){
for(j=0; j<m;j++){
s >> tmp;
x[i][j] = tmp;
}
2010-06-25 17:28:19 +02:00
}
2010-09-08 18:27:58 +02:00
return s;
#ifdef CUDALA
}else{
2010-06-25 17:28:19 +02:00
NRMat<T> tmp;
tmp.moveto(cpu);
s >> tmp;
tmp.moveto(x.getlocation());
2010-09-08 18:27:58 +02:00
x = tmp;
2010-06-25 17:28:19 +02:00
return s;
2010-09-08 18:27:58 +02:00
}
2010-06-25 17:28:19 +02:00
#endif
}
2006-10-21 17:32:53 +02:00
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* implements \c NRMat<T> functionality with indexing from 1
* all possible constructors have to be given explicitly, other stuff is inherited
* with exception of the operator() which differs
******************************************************************************/
2006-09-09 18:40:30 +02:00
template<typename T>
class NRMat_from1 : public NRMat<T> {
public:
2010-09-08 18:27:58 +02:00
NRMat_from1(): NRMat<T>() {};
template<int R, int C> explicit NRMat_from1(const T (&a)[R][C]) : NRMat<T>(a) {};
2010-09-08 18:27:58 +02:00
explicit NRMat_from1(const int n): NRMat<T>(n) {};
2019-11-13 23:22:25 +01:00
explicit NRMat_from1(const NRSMat_from1<T> &rhs) : NRMat<T>(rhs) {};
2010-09-08 18:27:58 +02:00
NRMat_from1(const NRMat<T> &rhs): NRMat<T>(rhs) {};//!< be able to convert the parent class transparently to this
NRMat_from1(const int n, const int m): NRMat<T>(n, m) {};
NRMat_from1(const T &a, const int n, const int m): NRMat<T>(a, n, m) {};
NRMat_from1(const T *a, const int n, const int m): NRMat<T>(a, n, m) {};
inline const T& operator() (const int i, const int j) const {
#ifdef DEBUG
if (i<1 || i>NRMat<T>::nn || j<1 || j>NRMat<T>::mm) laerror("index out of range");
if (!NRMat<T>::v) laerror("unallocated matrix");
#endif
NOT_GPU(*this);
#ifdef MATPTR
return NRMat<T>::v[i - 1][j - 1];
#else
2013-11-04 15:56:39 +01:00
return NRMat<T>::v[(i-1)*(size_t)NRMat<T>::mm+j-1];
2010-09-08 18:27:58 +02:00
#endif
}
2006-09-09 18:40:30 +02:00
2010-09-08 18:27:58 +02:00
inline T& operator() (const int i, const int j) {
#ifdef DEBUG
if (_LA_count_check && *NRMat<T>::count != 1) laerror("matrix with *count > 1 used as l-value");
if (i<1 || i>NRMat<T>::nn || j<1 || j>NRMat<T>::mm) laerror("index out of range");
if (!NRMat<T>::v) laerror("unallocated matrix");
#endif
NOT_GPU(*this);
#ifdef MATPTR
return NRMat<T>::v[i-1][j-1];
#else
return NRMat<T>::v[(i-1)*NRMat<T>::mm+j-1];
#endif
}
2006-09-09 18:40:30 +02:00
2010-09-08 18:27:58 +02:00
inline const T get_ij(const int i, const int j) const {
T ret;
#ifdef DEBUG
if (i<1 || i>NRMat<T>::nn || j<1 || j>NRMat<T>::mm) laerror("index out of range");
if (!NRMat<T>::v) laerror("unallocated matrix");
#endif
#ifdef CUDALA
if(NRMat<T>::location == cpu){
#endif
#ifdef MATPTR
return NRMat<T>::v[i - 1][j - 1];
#else
2013-11-04 15:56:39 +01:00
return NRMat<T>::v[(size_t)(i-1)*NRMat<T>::mm + (j-1)];
2010-09-08 18:27:58 +02:00
#endif
#ifdef CUDALA
}else{
2013-11-04 15:56:39 +01:00
const size_t pozice = (size_t)(i-1)*NRMat<T>::mm + (j-1);
2010-09-08 18:27:58 +02:00
gpuget(1, sizeof(T), NRMat<T>::v + pozice, &ret);
return ret;
}
#endif
}
};
2006-09-09 18:40:30 +02:00
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* compute Hadamard (component-wise) product with a given matrix \f$A\f$
* @param[in] rhs matrix \f$A\f$
* @see count, operator*
* @return reference to the multiplied matrix
******************************************************************************/
2010-01-17 21:28:38 +01:00
template<typename T>
2010-09-08 18:27:58 +02:00
NRMat<T>& NRMat<T>::operator^=(const NRMat<T> &rhs){
2010-01-17 21:28:38 +01:00
#ifdef DEBUG
2010-09-08 18:27:58 +02:00
if (nn != rhs.nn || mm != rhs.mm) laerror("incompatible matrices");
2010-01-17 21:28:38 +01:00
#endif
2010-09-08 18:27:58 +02:00
SAME_LOC(*this, rhs);
NOT_GPU(*this);
copyonwrite();// ensure that *count == 1
2010-01-17 21:28:38 +01:00
#ifdef MATPTR
2013-11-04 15:56:39 +01:00
for (register size_t i=0; i< (size_t)nn*mm; i++) v[0][i] *= rhs.v[0][i];
2010-01-17 21:28:38 +01:00
#else
2013-11-04 15:56:39 +01:00
const size_t Dim = (size_t)nn*mm;
for(register size_t i=0; i<Dim; i++) v[i] *= rhs.v[i];
2010-01-17 21:28:38 +01:00
#endif
2010-09-08 18:27:58 +02:00
return *this;
2010-01-17 21:28:38 +01:00
}
2004-03-17 04:07:21 +01:00
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* performs memory movements in CUDA mode
* @param[in] dest memory destination
* @see count, location
******************************************************************************/
2010-06-25 17:28:19 +02:00
#ifdef CUDALA
template<typename T>
2010-09-08 18:27:58 +02:00
void NRMat<T>::moveto(const GPUID dest) {
if(location == dest) return;// no operation is necessary
/*
currently, only movements between CPU and GPU are implemented
CUBLAS seems to lack support for multiple GPUs
*/
CPU_GPU(location, dest);
location = dest;
if(v && !count) laerror("internal inconsistency of reference counting 1");
if (!count) return;
if(v && *count==0) laerror("internal inconsistency of reference counting 2");
if(!v) return;
T *vold = v;
if(dest == cpu){ //moving from GPU to CPU
2013-11-04 15:56:39 +01:00
v = new T[(size_t)nn*mm];
gpuget((size_t)nn*mm, sizeof(T), vold, v);
2010-09-08 18:27:58 +02:00
if(*count == 1){ gpufree(vold); }
else{ --(*count); count = new int(1); }
}else{ //moving from CPU to GPU
2013-11-04 15:56:39 +01:00
v = (T *) gpualloc((size_t)nn*mm*sizeof(T));
gpuput((size_t)nn*mm, sizeof(T), vold, v);
2010-09-08 18:27:58 +02:00
if(*count == 1) delete[] vold;
else{ --(*count); count = new int(1);}
2010-06-25 17:28:19 +02:00
}
}
#endif
2004-03-17 04:07:21 +01:00
2019-11-13 23:22:25 +01:00
/***************************************************************************//**
* add a given general matrix (type T) \f$A\f$ to the current complex matrix
* @param[in] rhs matrix \f$A\f$ of type T
* @return reference to the modified matrix
******************************************************************************/
template <typename T>
NRMat<T> & NRMat<T>::operator+=(const NRMat<T> &rhs) {
#ifdef DEBUG
if (nn != rhs.nn || mm != rhs.mm) laerror("incompatible matrices");
#endif
SAME_LOC(*this, rhs);
NOT_GPU(*this);
copyonwrite();
#ifdef MATPTR
for(size_t i=0; i< (size_t)nn*mm; i++) v[0][i] += rhs.v[0][i];
#else
for(size_t i=0; i< (size_t)nn*mm; i++) v[i] += rhs.v[i];
#endif
return *this;
}
/***************************************************************************//**
* subtract a given general matrix (type T) \f$A\f$ from the current matrix
* @param[in] rhs matrix \f$A\f$ of type T
* @return reference to the modified matrix
******************************************************************************/
template <typename T>
NRMat<T> & NRMat<T>::operator-=(const NRMat<T> &rhs) {
#ifdef DEBUG
if (nn != rhs.nn || mm != rhs.mm) laerror("incompatible matrices");
#endif
SAME_LOC(*this, rhs);
NOT_GPU(*this);
copyonwrite();
#ifdef MATPTR
for(size_t i=0; i< (size_t)nn*mm; i++) v[0][i] -= rhs.v[0][i];
#else
for(size_t i=0; i<(size_t) nn*mm; i++) v[i] -= rhs.v[i];
#endif
return *this;
}
/*difference of absolute values*/
template <typename T>
NRMat<typename LA_traits<T>::normtype> NRMat<T>::diffabs(const NRMat<T> &rhs) const {
#ifdef DEBUG
if (nn != rhs.nn ||mm!=rhs.mm) laerror("incompatible dimensions");
#endif
NOT_GPU(*this);
NOT_GPU(rhs);
NRMat<typename LA_traits<T>::normtype> r(nn,mm);
for(size_t i=0; i< nn; i++) for(size_t j=0; j< mm; j++) r(i,j) = MYABS((*this)(i,j)) - MYABS(rhs(i,j));
return r;
}
/*elementwise absolute values*/
template <typename T>
NRMat<typename LA_traits<T>::normtype> NRMat<T>::abs() const {
NOT_GPU(*this);
NRMat<typename LA_traits<T>::normtype> r(nn,mm);
for(size_t i=0; i< nn; i++) for(size_t j=0; j< mm; j++) r(i,j) = MYABS((*this)(i,j));
return r;
}
2019-11-13 23:22:25 +01:00
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* generate operators: Mat + a, a + Mat, Mat * a
* corresponding macro is defined in vec.h
******************************************************************************/
NRVECMAT_OPER(Mat, +)
NRVECMAT_OPER(Mat, -)
NRVECMAT_OPER(Mat, *)
2004-03-17 04:07:21 +01:00
2010-09-08 18:27:58 +02:00
/***************************************************************************//**
* generate Mat + Mat, Mat - Mat
* corresponding macro is defined in vec.h
******************************************************************************/
NRVECMAT_OPER2(Mat, +)
NRVECMAT_OPER2(Mat, -)
2004-03-17 04:07:21 +01:00
2010-09-08 18:27:58 +02:00
}//end of the LA-namespace
#endif/* _LA_MAT_H_ */
2013-11-04 15:56:39 +01:00