*** empty log message ***

2006-09-11 23:07:22 +00:00
parent e49b73e9e9
commit d55727cb83
5 changed files with 71 additions and 24 deletions
--- a/matexp.h
+++ b/matexp.h
@@ -248,28 +248,43 @@ return r;
 //this simple implementation seems not to be numerically stable enough
 //and probably not efficient either
 //@@@ make more efficient - for nilpotent mat at known power and 
 template<class M, class V>
-const V exptimesnaive(const M &mat, V vec) //uses just matrix vector multiplication
+void exptimesdestructive(const M &mat, V &result, V &rhs,  bool transpose=false, const double scale=1. ) //uses just matrix vector multiplication
 {
-if(mat.nrows()!=mat.ncols()||(unsigned int) mat.nrows() != (unsigned int)vec.size()) laerror("inappropriate sizes in exptimes");
+if(mat.nrows()!=mat.ncols()||(unsigned int) mat.nrows() != (unsigned int)rhs.size()) laerror("inappropriate sizes in exptimes");
 int power;
 //prepare the polynom of and effectively scale the matrix
 NRVec<typename LA_traits<V>::elementtype> taylor2=exp_aux<M,typename LA_traits<V>::elementtype>(mat,power);
 cerr <<"test power "<<power<<endl;
 V tmp;
 V result(mat.nrows());
 for(int i=1; i<=(1<<power); ++i) //unfortunatelly, here we have to repeat it many times, unlike if the matrix is stored explicitly
 	{
-	if(i>1) vec=result; //apply again to the result of previous application
+	if(i>1) rhs=result; //apply again to the result of previous application
-	//apply polynom of the matrix to the vector iteratively
+	else result=rhs;
-	V y=vec;
+	tmp=rhs; //now rhs can be used as scratch	
-	result=y*taylor2[0];
+	result*=taylor2[0];
 	for(int j=1; j<taylor2.size(); ++j)
 		{
-		y=mat*y;
+		mat.gemv(0.,rhs,transpose?'t':'n',scale,tmp);
-		result.axpy(taylor2[j],y);
+		tmp=rhs;
 		result.axpy(taylor2[j],tmp);
 		}
 	}
 return;
 }
 template<class M, class V>
 const V exptimes(const M &mat, V rhs, bool transpose=false, const double scale=1.)
 {
 V result;
 exptimesdestructive(mat,result,rhs,transpose,scale);
 return result;
 }
@@ -279,8 +294,7 @@ return result;
-
+#ifdef EXPOKIT
 //this comes from dgexpv from expokit, it had to be translated to C to make a template
 // dgexpv.f -- translated by f2c (version 20030320).
@@ -323,13 +337,13 @@ extern "C" void FORNAME(dnchbv)(int *, double *, double *, int *, double *, doub
 //partial template specialization leaving still free room for generic matrix type
 template<class M>
-extern void exptimes(const M &mat, NRVec<double> &result, const NRVec<double> &rhs, bool transpose=false, const double t=1., int mxkrylov=0, double tol=1e-14, int mxstep=1000, int mxreject=0, int ideg=6)
+extern void dgexpv(const M &mat, NRVec<double> &result, const NRVec<double> &rhs, bool transpose=false, const double t=1., int mxkrylov=0, double tol=1e-14, int mxstep=1000, int mxreject=0, int ideg=6)
 {
 if(mat.nrows()!= mat.ncols()) laerror("non-square matrix in exptimes");
 int n=mat.nrows();
 int m;
-if(mxkrylov)  m=mxkrylov; else m= n>100? 100: n-1;
+if(mxkrylov)  m=mxkrylov; else m= n>100? 100: (n>1?n-1:1);
-if (m >= n || m <= 0) laerror("illegal mxkrylov");
+if (m > n || m <= 0) laerror("illegal mxkrylov");
 if(result.size()!=n || rhs.size()!=n) laerror("inconsistent vector and matrix size in exptimes");
 const double *v=rhs;
@@ -766,6 +780,9 @@ return;
 #undef FORNAME
 #undef pow_dd
 #endif 
 //EXPOKIT
 //@@@ power series matrix logarithm?
--- a/noncblas.cc
+++ b/noncblas.cc
@@ -186,9 +186,9 @@ void cblas_dgemv(const enum CBLAS_ORDER Order,
                 const double *X, const int incX, const double beta,
                 double *Y, const int incY)
 {
-if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
+if(Order!=CblasRowMajor) FORNAME(dgemv) (TransA==CblasNoTrans?"N":"T", &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY );
 //swap n-m and toggle transposition
-FORNAME(dgemv) (TransA==CblasNoTrans?"T":"N", &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY );
+else FORNAME(dgemv) (TransA==CblasNoTrans?"T":"N", &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY );
 }
--- a/t.cc
+++ b/t.cc
@@ -675,9 +675,12 @@ int n;
 double f;
 cin>>n >>f ;
 NRMat<double> a(n,n);
-for(int i=0;i<n;++i) for(int j=0;j<n;++j)
+NRVec<double>u(n),v,w;
 for(int i=0;i<n;++i) 
        {
-        a(i,j)= f*random()/(1.+RAND_MAX);
+	u[i]=f*random()/(1.+RAND_MAX);
 	for(int j=0;j<n;++j)
       		 a(i,j)= f*random()/(1.+RAND_MAX);
        }
 //cout <<"a matrix \n"<<a;
 //cout<<"EXP\n";
@@ -695,6 +698,13 @@ c=exp(a,false);
 cout <<" tricky exp  took "<<clock()/((double) (CLOCKS_PER_SEC))-t<<endl;
 cout<<"error2 = "<<(c-b).norm()/b.norm()<<endl;
 v=b*u;
 t=clock()/((double) (CLOCKS_PER_SEC));
 w=exptimes(a,u);
 cout <<"exptimes  took "<<clock()/((double) (CLOCKS_PER_SEC))-t<<endl;
 cout <<"error of exptimes = "<<(v-w).norm()/v.norm()<<endl;
 }
--- a/vec.cc
+++ b/vec.cc
@@ -450,6 +450,7 @@ void NRVec<double>::gemv(const double beta, const NRMat<double> &A,
 	if ((trans == 'n'?A.ncols():A.nrows()) != x.size())
 		laerror("incompatible sizes in gemv A*x");
 #endif
 	copyonwrite();
 	cblas_dgemv(CblasRowMajor, (trans=='n' ? CblasNoTrans:CblasTrans),
 			A.nrows(), A.ncols(), alpha, A, A.ncols(), x.v, 1, beta, v, 1);
 }
@@ -463,6 +464,7 @@ void NRVec< complex<double> >::gemv(const complex<double> beta,
 	if ((trans == 'n'?A.ncols():A.nrows()) != x.size()) 
 		laerror("incompatible sizes in gemv A*x");
 #endif
 	copyonwrite();
 	cblas_zgemv(CblasRowMajor, (trans=='n' ? CblasNoTrans:CblasTrans), 
 			A.nrows(), A.ncols(), &alpha, A, A.ncols(), 
 			x.v, 1, &beta, v, 1);
@@ -476,7 +478,7 @@ void NRVec<double>::gemv(const double beta, const NRSMat<double> &A,
 #ifdef DEBUG
 	if (A.ncols()!=x.size()) laerror("incompatible dimension in gemv A*x");
 #endif
-	NRVec<double> result(nn);
+	copyonwrite();
 	cblas_dspmv(CblasRowMajor, CblasLower, A.ncols(), alpha, A, x.v, 1, beta, v, 1);
 }
@@ -489,7 +491,7 @@ void NRVec< complex<double> >::gemv(const complex<double> beta,
 #ifdef DEBUG
 	if (A.ncols()!=x.size()) laerror("incompatible dimension in gemv");
 #endif
-	NRVec< complex<double> > result(nn);
+	copyonwrite();
 	cblas_zhpmv(CblasRowMajor, CblasLower, A.ncols(), &alpha, A, 
 			x.v, 1, &beta, v, 1);
 }
--- a/vec.h
+++ b/vec.h
@@ -42,7 +42,8 @@ public:
 	inline NRVec(): nn(0),v(0),count(0){};
 	explicit inline NRVec(const int n) : nn(n), v(new T[n]), count(new int(1)) {};
 	inline NRVec(const T &a, const int n);
-	inline NRVec(const T *a, const int n);
+        inline NRVec(const T *a, const int n);
 	inline NRVec(T *a, const int n, bool skeleton);
 	inline NRVec(const NRVec &rhs);
 	inline explicit NRVec(const NRSMat<T> & S);
 #ifdef MATPTR
@@ -136,10 +137,27 @@ inline NRVec<T>::NRVec(const T& a, const int n) : nn(n), v(new T[n]), count(new
 }
 template <typename T>
-inline NRVec<T>::NRVec(const T *a, const int n) : nn(n), v(new T[n]), count(new int)
+inline NRVec<T>::NRVec(const T *a, const int n) : nn(n), count(new int)
 {
-	*count = 1;
+		v=new T[n];
-	memcpy(v, a, n*sizeof(T));
+		*count = 1;
 		memcpy(v, a, n*sizeof(T));
 }
 template <typename T>
 inline NRVec<T>::NRVec(T *a, const int n, bool skeleton) : nn(n), count(new int)
 {
 	if(!skeleton)
 		{
 		v=new T[n];
 		*count = 1;
 		memcpy(v, a, n*sizeof(T));
 		}
 	else
 		{
 		*count = 2;
 		v=a;
 		}
 }
 template <typename T>