*** empty log message ***

This commit is contained in:
jiri
2013-11-04 14:56:39 +00:00
parent a9e30620f0
commit 80fe44fab2
18 changed files with 505 additions and 308 deletions

109
mat.h
View File

@@ -39,10 +39,10 @@ protected:
T *v;//!< pointer to the data stored continuously in emmory
#endif
int *count;//!< reference counter
public:
#ifdef CUDALA
GPUID location;
#endif
public:
friend class NRVec<T>;
friend class NRSMat<T>;
@@ -89,16 +89,16 @@ public:
//! explicit constructor converting vector into a <code>NRMat<T></code> object
#ifdef MATPTR
explicit NRMat(const NRVec<T> &rhs, const int n, const int m, const int offset = 0):NRMat(&rhs[0][0] + offset , n, m){
if (offset < 0 || n*m + offset > rhs.nn) laerror("matrix dimensions and offset incompatible with vector length");
if (offset < 0 || (size_t)n*m + offset > rhs.nn) laerror("matrix dimensions and offset incompatible with vector length");
};
#else
explicit NRMat(const NRVec<T> &rhs, const int n, const int m, const int offset = 0);
#endif
#ifdef MATPTR
const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return LA_traits<T>::gencmp(v[0],rhs.v[0],nn*mm);} //memcmp for scalars else elementwise
const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return LA_traits<T>::gencmp(v[0],rhs.v[0],(size_t)nn*mm);} //memcmp for scalars else elementwise
#else
const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return LA_traits<T>::gencmp(v,rhs.v,nn*mm);} //memcmp for scalars else elementwise
const bool operator!=(const NRMat &rhs) const {if(nn!=rhs.nn || mm!=rhs.mm) return 1; return LA_traits<T>::gencmp(v,rhs.v,(size_t)nn*mm);} //memcmp for scalars else elementwise
#endif
const bool operator==(const NRMat &rhs) const {return !(*this != rhs);};
@@ -107,7 +107,7 @@ public:
inline int getcount() const {return count?*count:0;}
//! ensure that the data of this matrix are referenced exactly once
void copyonwrite();
void copyonwrite(bool detachonly=false);
/***************************************************************************//**
* routines for CUDA related stuff
@@ -260,7 +260,7 @@ public:
//! get the number of columns
inline int ncols() const;
//! get the number of matrix elements
inline int size() const;
inline size_t size() const;
//! unformatted input
void get(int fd, bool dimensions = 1, bool transposed = false);
@@ -274,8 +274,8 @@ public:
//! set all matrix elements equal to zero
void clear(){
if(nn&&mm){
copyonwrite();
LA_traits<T>::clear((*this)[0], nn*mm);
copyonwrite(true);
LA_traits<T>::clear((*this)[0], (size_t)nn*mm);
}
};
@@ -379,7 +379,7 @@ template <typename T>
NRMat<T>::NRMat(const int n, const int m, const GPUID loc) : nn(n), mm(m), count(new int) {
T* p;
*count = 1;
const int nm = n*m;
const size_t nm = (size_t)n*m;
#ifdef CUDALA
location = (loc==undefined?DEFAULT_LOC:loc);
if(location == cpu) {
@@ -408,7 +408,7 @@ NRMat<T>::NRMat(const int n, const int m, const GPUID loc) : nn(n), mm(m), count
******************************************************************************/
template <typename T>
NRMat<T>::NRMat(const T &a, const int n, const int m, const GPUID loc) : nn(n), mm(m), count(new int) {
const int nm = n*m;
const size_t nm = (size_t)n*m;
T *p;
*count = 1;
@@ -447,7 +447,7 @@ NRMat<T>::NRMat(const T &a, const int n, const int m, const GPUID loc) : nn(n),
******************************************************************************/
template <typename T>
NRMat<T>::NRMat(const T &a, const int n, const int m) : nn(n), mm(m), count(new int) {
const int nm = n*m;
const size_t nm = (size_t)n*m;
T *p;
*count = 1;
@@ -460,7 +460,7 @@ NRMat<T>::NRMat(const T &a, const int n, const int m) : nn(n), mm(m), count(new
p = v[0] = new T[nm];
for (register int i=1; i<n; i++) v[i] = v[i-1] + m;
#else
p = v = new T[m*n];
p = v = new T[nm];
#endif
if (a != (T)0)
for (register int i=0; i<nm; i++) *p++ = a;
@@ -483,7 +483,7 @@ NRMat<T>::NRMat(const T &a, const int n, const int m) : nn(n), mm(m), count(new
******************************************************************************/
template <typename T>
NRMat<T>::NRMat(const T *a, const int n, const int m) : nn(n), mm(m), count(new int) {
const int nm = n*m;
const size_t nm = (size_t)n*m;
#ifdef CUDALA
location = DEFAULT_LOC;
#endif
@@ -546,10 +546,10 @@ NRMat<T>::NRMat(const NRSMat<T> &rhs) {
*count = 1;
#ifdef MATPTR
v = new T*[nn];
v[0] = new T[mm*nn];
v[0] = new T[(size_t)mm*nn];
for (int i=1; i<nn; i++) v[i] = v[i-1] + mm;
#else
v = new T[mm*nn];
v = new T[(size_t)mm*nn];
#endif
#ifdef MATPTR
@@ -561,7 +561,7 @@ NRMat<T>::NRMat(const NRSMat<T> &rhs) {
#else
for (i=0; i<nn; i++){
for (j=0; j<=i; j++){
v[i*nn + j] = v[j*nn + i] = rhs[k++];
v[i*(size_t)nn + j] = v[j*(size_t)nn + i] = rhs[k++];
}
}
#endif
@@ -578,7 +578,7 @@ NRMat<T>::NRMat(const NRSMat<T> &rhs) {
template <typename T>
NRMat<T>::NRMat(const NRVec<T> &rhs, const int n, const int m, const int offset)
{
if (offset < 0 || n*m + offset > rhs.nn) laerror("matrix dimensions and offset incompatible with vector length");
if (offset < 0 || (size_t)n*m + offset > rhs.nn) laerror("matrix dimensions and offset incompatible with vector length");
#ifdef CUDALA
location=rhs.location;
@@ -628,7 +628,7 @@ inline T* NRMat<T>::operator[](const int i) {
#ifdef MATPTR
return v[i];
#else
return v + i*mm;
return v + i*(size_t)mm;
#endif
}
@@ -646,7 +646,7 @@ inline const T* NRMat<T>::operator[](const int i) const {
#ifdef MATPTR
return v[i];
#else
return v + i*mm;
return v + i*(size_t)mm;
#endif
}
@@ -668,7 +668,7 @@ inline T& NRMat<T>::operator()(const int i, const int j){
#ifdef MATPTR
return v[i][j];
#else
return v[i*mm + j];
return v[i*(size_t)mm + j];
#endif
}
@@ -689,7 +689,7 @@ inline const T& NRMat<T>::operator()(const int i, const int j) const{
#ifdef MATPTR
return v[i][j];
#else
return v[i*mm + j];
return v[i*(size_t)mm + j];
#endif
}
@@ -712,11 +712,11 @@ inline const T NRMat<T>::get_ij(const int i, const int j) const{
#ifdef MATPTR
return v[i][j];
#else
return v[i*mm + j];
return v[i*(size_t)mm + j];
#endif
#ifdef CUDALA
}else{
const int pozice = i*mm + j;
const size_t pozice = i*(size_t)mm + j;
gpuget(1, sizeof(T), v + pozice, &ret);
return ret;
}
@@ -743,8 +743,8 @@ inline int NRMat<T>::ncols() const{
* @return number of elements
******************************************************************************/
template <typename T>
inline int NRMat<T>::size() const{
return nn*mm;
inline size_t NRMat<T>::size() const{
return (size_t)nn*mm;
}
/***************************************************************************//**
@@ -795,7 +795,7 @@ inline const double NRMat<double>::amax() const{
#ifdef CUDALA
}else{
double ret(0.0);
const int pozice = cublasIdamax(nn*mm, v, 1) - 1;
const size_t pozice = cublasIdamax((size_t)nn*mm, v, 1) - 1;
TEST_CUBLAS("cublasIdamax");
gpuget(1, sizeof(double), v + pozice, &ret);
return ret;
@@ -815,7 +815,7 @@ inline const double NRMat<double>::amin() const{
if(location == cpu){
#endif
// idamin seems not to be supported
const int nm = nn*mm;
const size_t nm = (size_t)nn*mm;
double val(0.0);
int index(-1);
ret = std::numeric_limits<double>::max();
@@ -834,7 +834,7 @@ inline const double NRMat<double>::amin() const{
#endif
#ifdef CUDALA
}else{
const int pozice = cublasIdamin(nn*mm, v, 1) - 1;
const size_t pozice = cublasIdamin((size_t)nn*mm, v, 1) - 1;
TEST_CUBLAS("cublasIdamin");
gpuget(1, sizeof(double), v + pozice, &ret);
}
@@ -860,7 +860,7 @@ inline const complex<double> NRMat<complex<double> >::amax() const{
#ifdef CUDALA
}else{
complex<double> ret(0.0, 0.0);
const int pozice = cublasIzamax(nn*mm, (cuDoubleComplex*)v, 1) - 1;
const size_t pozice = cublasIzamax((size_t)nn*mm, (cuDoubleComplex*)v, 1) - 1;
TEST_CUBLAS("cublasIzamax");
gpuget(1, sizeof(complex<double>), v + pozice, &ret);
return ret;
@@ -881,7 +881,7 @@ inline const complex<double> NRMat<complex<double> >::amin() const{
if(location == cpu){
#endif
// idamin seems not to be supported
const int nm = nn*mm;
const size_t nm = (size_t)nn*mm;
int index(-1);
double val(0.0), min_val(0.0);
complex<double> z_val(0.0, 0.0);
@@ -903,7 +903,7 @@ inline const complex<double> NRMat<complex<double> >::amin() const{
#endif
#ifdef CUDALA
}else{
const int pozice = cublasIzamin(nn*mm, (cuDoubleComplex*)v, 1) - 1;
const size_t pozice = cublasIzamin((size_t)nn*mm, (cuDoubleComplex*)v, 1) - 1;
TEST_CUBLAS("cublasIzamin");
gpuget(1, sizeof(complex<double>), v + pozice, &ret);
}
@@ -991,7 +991,7 @@ NRMat<T> & NRMat<T>::operator|=(const NRMat<T> &rhs) {
* @see NRMat<T>::count, NRMat<T>::operator|=()
******************************************************************************/
template <typename T>
void NRMat<T>::copyonwrite() {
void NRMat<T>::copyonwrite(bool detachonly) {
if(!count) laerror("attempt to call copyonwrite() for a matrix with count == 0");
if(*count > 1){
(*count)--;
@@ -1002,20 +1002,20 @@ void NRMat<T>::copyonwrite() {
#endif
#ifdef MATPTR
T **newv = new T*[nn];
newv[0] = new T[mm*nn];
memcpy(newv[0], v[0], mm*nn*sizeof(T));
newv[0] = new T[(size_t)mm*nn];
if(!detachonly) memcpy(newv[0], v[0], (size_t)mm*nn*sizeof(T));
v = newv;
for(register int i=1; i<nn; i++) v[i] = v[i-1] + mm;
#else
T *newv = new T[mm*nn];
memcpy(newv, v, mm*nn*sizeof(T));
T *newv = new T[(size_t)mm*nn];
if(!detachonly) memcpy(newv, v, (size_t)mm*nn*sizeof(T));
v = newv;
#endif
#ifdef CUDALA
}else{ //matrix is in GPU memory
T *newv = (T *) gpualloc(mm*nn*sizeof(T));
T *newv = (T *) gpualloc((size_t)mm*nn*sizeof(T));
if(sizeof(T)%sizeof(float) != 0) laerror("cpu memcpy alignment problem");
cublasScopy(nn*mm*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
if(!detachonly) cublasScopy(nn*mm*sizeof(T)/sizeof(float), (const float *) v, 1, (float *)newv, 1);
TEST_CUBLAS("cublasScopy");
v = newv;
}
@@ -1082,14 +1082,14 @@ void NRMat<T>::resize(int n, int m) {
#endif
#ifdef MATPTR
v = new T*[nn];
v[0] = new T[m*n];
v[0] = new T[(size_t)m*n];
for (register int i=1; i< n; i++) v[i] = v[i-1] + m;
#else
v = new T[m*n];
v = new T[(size_t)m*n];
#endif
#ifdef CUDALA
}else{
v = (T *) gpualloc(n*m*sizeof(T));
v = (T *) gpualloc((size_t)n*m*sizeof(T));
}
#endif
return;
@@ -1108,15 +1108,15 @@ void NRMat<T>::resize(int n, int m) {
delete[] v;
#ifdef MATPTR
v = new T*[nn];
v[0] = new T[m*n];
v[0] = new T[(size_t)m*n];
for (int i=1; i< n; i++) v[i] = v[i-1] + m;
#else
v = new T[m*n];
v = new T[(size_t)m*n];
#endif
#ifdef CUDALA
}else{
gpufree(v);
v=(T *) gpualloc(n*m*sizeof(T));
v=(T *) gpualloc((size_t)n*m*sizeof(T));
}
#endif
}
@@ -1228,7 +1228,7 @@ public:
#ifdef MATPTR
return NRMat<T>::v[i - 1][j - 1];
#else
return NRMat<T>::v[(i-1)*NRMat<T>::mm+j-1];
return NRMat<T>::v[(i-1)*(size_t)NRMat<T>::mm+j-1];
#endif
}
@@ -1258,11 +1258,11 @@ public:
#ifdef MATPTR
return NRMat<T>::v[i - 1][j - 1];
#else
return NRMat<T>::v[(i-1)*NRMat<T>::mm + (j-1)];
return NRMat<T>::v[(size_t)(i-1)*NRMat<T>::mm + (j-1)];
#endif
#ifdef CUDALA
}else{
const int pozice = (i-1)*NRMat<T>::mm + (j-1);
const size_t pozice = (size_t)(i-1)*NRMat<T>::mm + (j-1);
gpuget(1, sizeof(T), NRMat<T>::v + pozice, &ret);
return ret;
}
@@ -1286,10 +1286,10 @@ NRMat<T>& NRMat<T>::operator^=(const NRMat<T> &rhs){
copyonwrite();// ensure that *count == 1
#ifdef MATPTR
for (register int i=0; i< nn*mm; i++) v[0][i] *= rhs.v[0][i];
for (register size_t i=0; i< (size_t)nn*mm; i++) v[0][i] *= rhs.v[0][i];
#else
const int Dim = nn*mm;
for(register int i=0; i<Dim; i++) v[i] *= rhs.v[i];
const size_t Dim = (size_t)nn*mm;
for(register size_t i=0; i<Dim; i++) v[i] *= rhs.v[i];
#endif
return *this;
}
@@ -1320,14 +1320,14 @@ void NRMat<T>::moveto(const GPUID dest) {
T *vold = v;
if(dest == cpu){ //moving from GPU to CPU
v = new T[nn*mm];
gpuget(nn*mm, sizeof(T), vold, v);
v = new T[(size_t)nn*mm];
gpuget((size_t)nn*mm, sizeof(T), vold, v);
if(*count == 1){ gpufree(vold); }
else{ --(*count); count = new int(1); }
}else{ //moving from CPU to GPU
v = (T *) gpualloc(nn*mm*sizeof(T));
gpuput(nn*mm, sizeof(T), vold, v);
v = (T *) gpualloc((size_t)nn*mm*sizeof(T));
gpuput((size_t)nn*mm, sizeof(T), vold, v);
if(*count == 1) delete[] vold;
else{ --(*count); count = new int(1);}
}
@@ -1351,3 +1351,4 @@ NRVECMAT_OPER2(Mat, -)
}//end of the LA-namespace
#endif/* _LA_MAT_H_ */