*** empty log message ***

2010-06-25 15:28:19 +00:00
parent eb0aaf9adf
commit 074c943862
13 changed files with 1938 additions and 464 deletions
--- a/mat.cc
+++ b/mat.cc
@@ -122,6 +122,15 @@ return r;
 template <typename T>
 void NRMat<T>::put(int fd, bool dim, bool transp) const
 {
+#ifdef CUDALA
+if(location!=cpu)
+	{
+	NRMat<T> tmp= *this;
+	tmp.moveto(cpu);
+	tmp.put(fd,dim,transp);
+	return;
+	}
+#endif
 errno=0;
 if(dim)
 {
@@ -153,6 +162,17 @@ else LA_traits<T>::multiput(nn*mm,fd,
 template <typename T>
 void NRMat<T>::get(int fd, bool dim, bool transp)
 {
+#ifdef CUDALA
+if(location!=cpu)
+        {
+        NRMat<T> tmp;
+	tmp.moveto(cpu);
+        tmp.get(fd,dim,transp);
+        tmp.moveto(location);
+	*this = tmp;
+        return;
+        }
+#endif
 int nn0,mm0;
 errno=0;
 if(dim)
@@ -188,6 +208,43 @@ else LA_traits<T>::multiget(nn*mm,fd,


 // Assign diagonal
+template <>
+NRMat<double> & NRMat<double>::operator=(const double &a)
+{
+        copyonwrite();
+#ifdef DEBUG
+        if (nn != mm) laerror("RMat.operator=scalar on non-square matrix");
+#endif
+#ifdef CUDALA
+        if(location==cpu)
+	{
+#endif
+#ifdef MATPTR
+         memset(v[0],0,nn*nn*sizeof(double));
+         for (int i=0; i< nn; i++) v[i][i] = a;
+#else
+	 double n=0.;
+	 cblas_dcopy(nn*nn, &n, 0, v, 1);
+	 cblas_dcopy(nn, &a, 0, v, nn+1);
+#endif
+#ifdef CUDALA
+	}
+	else
+	{
+	double *d=gpuputdouble(0.);
+        cublasDcopy(nn*nn, d, 0, v, 1);
+	gpufree(d);
+	d=gpuputdouble(a);
+        cublasDcopy(nn, d, 0, v, nn+1);
+	gpufree(d);
+	}
+#endif
+         return *this;
+}
+
+
+
+
 template <typename T>
 NRMat<T> & NRMat<T>::operator=(const T &a)
 {
@@ -206,6 +263,64 @@ NRMat<T> & NRMat<T>::operator=(const T &a)
 }


+template <>
+NRMat<double> & NRMat<double>::operator+=(const double&a)
+{
+        copyonwrite();
+#ifdef DEBUG
+        if (nn != mm) laerror("Mat.operator+=scalar on non-square matrix");
+#endif
+#ifdef CUDALA
+	if(location==cpu)
+	{
+#endif
+#ifdef MATPTR
+        for (int i=0; i< nn; i++) v[i][i] += a;
+#else
+	cblas_daxpy(nn, 1.0, &a, 0, *this, nn+1);
+#endif
+#ifdef CUDALA
+	}
+	else
+	{
+	double *d=gpuputdouble(a);
+	cublasDaxpy(nn, 1.0, d, 0, *this, nn+1);
+	gpufree(d);
+	}
+#endif
+        return *this;
+}
+
+
+template <>
+NRMat<double> & NRMat<double>::operator-=(const double&a)
+{
+        copyonwrite();
+#ifdef DEBUG
+        if (nn != mm) laerror("Mat.operator+=scalar on non-square matrix");
+#endif
+#ifdef CUDALA
+	if(location==cpu)
+	{
+#endif
+#ifdef MATPTR
+        for (int i=0; i< nn; i++) v[i][i] -= a;
+#else
+	cblas_daxpy(nn, -1.0, &a, 0, *this, nn+1);
+#endif
+#ifdef CUDALA
+	}
+	else
+	{
+        double *d=gpuputdouble(a);
+        cublasDaxpy(nn, -1.0, d, 0, *this, nn+1);
+        gpufree(d);
+	}
+#endif
+        return *this;
+}
+
+


 // M += a
@@ -240,6 +355,31 @@ NRMat<T> & NRMat<T>::operator-=(const T &a)
 	return *this;
 }

+template <>
+const NRMat<double> NRMat<double>::operator-() const
+{
+        NRMat<double> result(nn, mm);
+#ifdef CUDALA
+        if(location==cpu)
+	{
+#endif
+#ifdef MATPTR
+        for (int i=0; i<nn*mm; i++) result.v[0][i]= -v[0][i];
+#else
+	cblas_dscal(nn*mm, -1., v, 1);
+#endif
+#ifdef CUDALA
+        }
+	else
+	{
+	cublasDscal(nn*mm, -1., v, 1);
+	}
+#endif
+        return result;
+}
+
+
+
 // unary minus
 template <typename T>
 const NRMat<T> NRMat<T>::operator-() const
@@ -253,6 +393,7 @@ const NRMat<T> NRMat<T>::operator-() const
 	return result;
 }

+
 // direct sum
 template <typename T>
 const NRMat<T> NRMat<T>::operator&(const NRMat<T> & b) const
@@ -540,7 +681,13 @@ template<>
 NRMat<double> & NRMat<double>::operator*=(const double &a)
 {
 	copyonwrite();
-	cblas_dscal(nn*mm, a, *this, 1);
+#ifdef CUDALA
+	if(location==cpu)
+#endif
+		cblas_dscal(nn*mm, a, *this, 1);
+#ifdef CUDALA
+	else 	cublasDscal(nn*mm, a, v, 1);
+#endif
 	return *this;
 }

@@ -559,6 +706,7 @@ NRMat< complex<double> >::operator*=(const complex<double> &a)
 template <typename T>
 NRMat<T> & NRMat<T>::operator*=(const T &a)
 {
+NOT_GPU(*this);
        copyonwrite();
 #ifdef MATPTR
         for (int i=0; i< nn*mm; i++) v[0][i] *= a;
@@ -578,8 +726,16 @@ NRMat<double> & NRMat<double>::operator+=(const NRMat<double>  &rhs)
 	if (nn != rhs.nn || mm!= rhs.mm) 
 		laerror("Mat += Mat of incompatible matrices");
 #endif
+SAME_LOC(*this,rhs);
 	copyonwrite();
+#ifdef CUDALA
+	if(location==cpu)
+#endif
 	cblas_daxpy(nn*mm, 1.0, rhs, 1, *this, 1);
+#ifdef CUDALA
+	else 
+	cublasDaxpy(nn*mm, 1.0, rhs, 1, v, 1);
+#endif
 	return *this;
 }

@@ -625,8 +781,16 @@ NRMat<double> & NRMat<double>::operator-=(const NRMat<double>  &rhs)
 	if (nn != rhs.nn || mm!= rhs.mm) 
 		laerror("Mat -= Mat of incompatible matrices");
 #endif
+	SAME_LOC(*this,rhs);
 	copyonwrite();
+#ifdef CUDALA
+	if(location==cpu)
+#endif
 	cblas_daxpy(nn*mm, -1.0, rhs, 1, *this, 1);
+#ifdef CUDALA
+	else
+	cublasDaxpy(nn*mm, -1.0, rhs, 1, v, 1);
+#endif
 	return *this;
 }

@@ -836,9 +1000,18 @@ const NRMat<double> NRMat<double>::operator*(const NRMat<double> &rhs) const
 	if (mm != rhs.nn) laerror("product of incompatible matrices");
 	if (rhs.mm <=0) laerror("illegal matrix dimension in gemm");
 #endif
-	NRMat<double> result(nn, rhs.mm);
+SAME_LOC(*this,rhs);
+
+	NRMat<double> result(nn, rhs.mm,rhs.getlocation());
+#ifdef CUDALA
+        if(location==cpu)
+#endif
 	cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, nn, rhs.mm, mm, 1.0,
 			*this, mm, rhs, rhs.mm, 0.0, result, rhs.mm);
+#ifdef CUDALA
+        else
+	cublasDgemm('N','N',rhs.mm,nn,mm,1.0,rhs, rhs.mm,*this, mm, 0.0, result, rhs.mm);
+#endif
 	return result;
 }

@@ -991,12 +1164,21 @@ void NRMat<double>::gemm(const double &beta, const NRMat<double> &a,
 	if (l!=nn || ll!=mm || k!=kk) laerror("incompatible matrices in Mat:gemm()");
 	if(b.mm <=0 || mm<=0) laerror("illegal matrix dimension in gemm");
 #endif
+SAME_LOC3(*this,a,b);
+
 	if (alpha==0.0 && beta==1.0) return;

 	copyonwrite();
+#ifdef CUDALA
+	if(location==cpu)
+#endif
 	cblas_dgemm(CblasRowMajor, (transa=='n' ? CblasNoTrans : CblasTrans),
 			(transb=='n' ? CblasNoTrans : CblasTrans), nn, mm, k, alpha, a,
 			a.mm, b , b.mm, beta, *this , mm);
+#ifdef CUDALA
+        else
+	cublasDgemm(transb,transa,mm,nn,k,alpha, b , b.mm, a,a.mm, beta, *this , mm);
+#endif
 }


@@ -1028,7 +1210,20 @@ void NRMat< complex<double> >::gemm(const complex<double> & beta,
 template<>
 const double  NRMat<double>::norm(const double scalar) const
 {
-	if (!scalar) return cblas_dnrm2(nn*mm, (*this)[0], 1);
+	if (!scalar)  
+		{
+#ifdef CUDALA
+		if(location==cpu)
+#endif
+		return cblas_dnrm2(nn*mm, (*this)[0], 1);
+#ifdef CUDALA
+		else
+		return cublasDnrm2(nn*mm, v, 1);
+#endif
+		}
+
+NOT_GPU(*this);
+
 	double sum = 0;
 	for (int i=0; i<nn; i++)
 		for (int j=0; j<mm; j++) {
@@ -1246,6 +1441,127 @@ else //vectors are columns



+//------------------------------------------------------------------------------
+//	for a matrix A(1:nn,1:mm) performs Fortran-like 
+//	operation A(nn:-1:1,:) 
+//------------------------------------------------------------------------------
+template<>
+NRMat<double>& NRMat<double>::SwapRows(){
+        copyonwrite();
+        const int n_pul = this->nn / 2;
+        double * const dataIn  = this->v;
+
+        for(register int i=0; i<n_pul; i++){
+                cblas_dswap(mm, dataIn + i*mm, 1, dataIn + (nn-i-1)*mm, 1);
+        }
+
+        return *this;
+}
+//------------------------------------------------------------------------------
+template<>
+NRMat<complex<double> >& NRMat<complex<double> >::SwapRows(){
+        copyonwrite();
+	const int n = this->nn;
+	const int m = this->mm;
+        const int n_pul = this->nn / 2;
+        complex<double> * const dataIn  = this->v;
+
+        for(register int i=0; i<n_pul; i++){
+                cblas_zswap(m, dataIn + i*m, 1, dataIn + (n-i-1)*m, 1);
+        }
+
+        return *this;
+}
+//------------------------------------------------------------------------------
+template<typename T>
+NRMat<T>& NRMat<T>::SwapRows(){
+        copyonwrite();
+	const int n = this->nn;
+	const int m = this->mm;
+        const int n_pul = this->nn / 2;
+        T * const dataIn  = this->v;
+
+        for(register int i=0; i<n_pul; i++){
+		const int offset1 = i*m;
+		const int offset2 = (n-i-1)*m;
+
+		for(register int j=0;j<m;j++){
+			dataIn[offset1 + j] = dataIn[offset2 + j];
+		}
+        }
+        return *this;
+}
+//------------------------------------------------------------------------------
+//	for a matrix A(1:nn,1:mm) performs Fortran-like 
+//	operation A(:,mm:-1:1) 
+//------------------------------------------------------------------------------
+template<>
+NRMat<double>& NRMat<double>::SwapCols(){
+        copyonwrite();
+	const int n = this->nn;
+	const int m = this->mm;
+        const int m_pul = m / 2;
+        double * const dataIn  = this->v;
+
+        for(register int i=0; i<m_pul; i++){
+                cblas_dswap(n, dataIn + i, m, dataIn + (m-i-1), m);
+        }
+
+        return *this;
+}
+//------------------------------------------------------------------------------
+template<>
+NRMat<complex<double> >& NRMat<complex<double> >::SwapCols(){
+        copyonwrite();
+        const int n_pul = this->nn / 2;
+        const int m_pul = this->mm / 2;
+        complex<double> * const dataIn  = this->v;
+
+        for(register int i=0; i<m_pul; i++){
+                cblas_zswap(nn, dataIn + i, mm, dataIn + (mm-i-1), mm);
+        }
+        return *this;
+}
+//------------------------------------------------------------------------------
+template<typename T>
+NRMat<T>& NRMat<T>::SwapCols(){
+        copyonwrite();
+        const int n_pul = nn / 2;
+        const int m_pul = mm / 2;
+        T * const dataIn  = this->v;
+
+        for(register int i=0; i<m_pul; i++){
+		for(register int j=0;j<nn;j++){
+			const int jm = j*mm;
+			dataIn[i + jm] = dataIn[(mm-i-1) + jm];
+		}
+        }
+        return *this;
+}
+//------------------------------------------------------------------------------
+//	for a matrix A(1:nn,1:mm) performs Fortran-like 
+//	operation A(nn:-1:1,mm:-1:1)
+//------------------------------------------------------------------------------
+template<typename T>
+NRMat<T>& NRMat<T>::SwapRowsCols(){
+        this->copyonwrite();
+        const int n = this->nn;
+        const int m = this->mm;
+        T * const dataIn  = this->v;
+        T * const dataOut = this->v;
+        
+	const int Dim = n*m;
+        for(register int i=0;i<n;i++){
+                const int off = i*n;
+                for(register int j=0;j<m;j++){
+                        const int offset = off + j;
+                        dataOut[Dim - (offset + 1)] = dataIn[offset];
+                }
+        }
+
+        return *this;
+}
+//------------------------------------------------------------------------------


 //////////////////////////////////////////////////////////////////////////////