From d55727cb839a62b2482364236fe09ad236025b7d Mon Sep 17 00:00:00 2001
From: jiri <jiri>
Date: Mon, 11 Sep 2006 23:07:22 +0000
Subject: [PATCH] *** empty log message ***

---
 matexp.h    | 45 +++++++++++++++++++++++++++++++--------------
 noncblas.cc |  4 ++--
 t.cc        | 14 ++++++++++++--
 vec.cc      |  6 ++++--
 vec.h       | 26 ++++++++++++++++++++++----
 5 files changed, 71 insertions(+), 24 deletions(-)
diff --git a/matexp.h b/matexp.h
index 6af3392..35ead29 100644
--- a/matexp.h
+++ b/matexp.h
@@ -248,28 +248,43 @@ return r;
 
 //this simple implementation seems not to be numerically stable enough
 //and probably not efficient either
+//@@@ make more efficient - for nilpotent mat at known power and 
+
 template<class M, class V>
-const V exptimesnaive(const M &mat, V vec) //uses just matrix vector multiplication
+void exptimesdestructive(const M &mat, V &result, V &rhs,  bool transpose=false, const double scale=1. ) //uses just matrix vector multiplication
 {
-if(mat.nrows()!=mat.ncols()||(unsigned int) mat.nrows() != (unsigned int)vec.size()) laerror("inappropriate sizes in exptimes");
+if(mat.nrows()!=mat.ncols()||(unsigned int) mat.nrows() != (unsigned int)rhs.size()) laerror("inappropriate sizes in exptimes");
+
 int power;
 //prepare the polynom of and effectively scale the matrix
 NRVec<typename LA_traits<V>::elementtype> taylor2=exp_aux<M,typename LA_traits<V>::elementtype>(mat,power);
+cerr <<"test power "<<power<<endl;
+
+V tmp;
 
-V result(mat.nrows());
 for(int i=1; i<=(1<<power); ++i) //unfortunatelly, here we have to repeat it many times, unlike if the matrix is stored explicitly
 	{
-	if(i>1) vec=result; //apply again to the result of previous application
-	//apply polynom of the matrix to the vector iteratively
-	V y=vec;
-	result=y*taylor2[0];
+	if(i>1) rhs=result; //apply again to the result of previous application
+	else result=rhs;
+	tmp=rhs; //now rhs can be used as scratch	
+	result*=taylor2[0];
 	for(int j=1; j<taylor2.size(); ++j)
 		{
-		y=mat*y;
-		result.axpy(taylor2[j],y);
+		mat.gemv(0.,rhs,transpose?'t':'n',scale,tmp);
+		tmp=rhs;
+		result.axpy(taylor2[j],tmp);
 		}
 	}
 
+return;
+}
+
+
+template<class M, class V>
+const V exptimes(const M &mat, V rhs, bool transpose=false, const double scale=1.)
+{
+V result;
+exptimesdestructive(mat,result,rhs,transpose,scale);
 return result;
 }
 
@@ -279,8 +294,7 @@ return result;
 
 
 
-
-
+#ifdef EXPOKIT
 //this comes from dgexpv from expokit, it had to be translated to C to make a template
 // dgexpv.f -- translated by f2c (version 20030320).
 
@@ -323,13 +337,13 @@ extern "C" void FORNAME(dnchbv)(int *, double *, double *, int *, double *, doub
 
 //partial template specialization leaving still free room for generic matrix type
 template<class M>
-extern void exptimes(const M &mat, NRVec<double> &result, const NRVec<double> &rhs, bool transpose=false, const double t=1., int mxkrylov=0, double tol=1e-14, int mxstep=1000, int mxreject=0, int ideg=6)
+extern void dgexpv(const M &mat, NRVec<double> &result, const NRVec<double> &rhs, bool transpose=false, const double t=1., int mxkrylov=0, double tol=1e-14, int mxstep=1000, int mxreject=0, int ideg=6)
 {
 if(mat.nrows()!= mat.ncols()) laerror("non-square matrix in exptimes");
 int n=mat.nrows();
 int m;
-if(mxkrylov)  m=mxkrylov; else m= n>100? 100: n-1;
-if (m >= n || m <= 0) laerror("illegal mxkrylov");
+if(mxkrylov)  m=mxkrylov; else m= n>100? 100: (n>1?n-1:1);
+if (m > n || m <= 0) laerror("illegal mxkrylov");
 if(result.size()!=n || rhs.size()!=n) laerror("inconsistent vector and matrix size in exptimes");
 
 const double *v=rhs;
@@ -766,6 +780,9 @@ return;
 #undef FORNAME
 #undef pow_dd
 
+#endif 
+//EXPOKIT
+
 
 //@@@ power series matrix logarithm?
 
diff --git a/noncblas.cc b/noncblas.cc
index ccb6954..049157a 100644
--- a/noncblas.cc
+++ b/noncblas.cc
@@ -186,9 +186,9 @@ void cblas_dgemv(const enum CBLAS_ORDER Order,
                  const double *X, const int incX, const double beta,
                  double *Y, const int incY)
 {
-if(Order!=CblasRowMajor) laerror("CblasRowMajor order asserted");
+if(Order!=CblasRowMajor) FORNAME(dgemv) (TransA==CblasNoTrans?"N":"T", &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY );
 //swap n-m and toggle transposition
-FORNAME(dgemv) (TransA==CblasNoTrans?"T":"N", &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY );
+else FORNAME(dgemv) (TransA==CblasNoTrans?"T":"N", &N, &M, &alpha, A, &lda, X, &incX, &beta, Y, &incY );
 }
 
 
diff --git a/t.cc b/t.cc
index 1a18ac0..b97c525 100644
--- a/t.cc
+++ b/t.cc
@@ -675,9 +675,12 @@ int n;
 double f;
 cin>>n >>f ;
 NRMat<double> a(n,n);
-for(int i=0;i<n;++i) for(int j=0;j<n;++j)
+NRVec<double>u(n),v,w;
+for(int i=0;i<n;++i) 
         {
-        a(i,j)= f*random()/(1.+RAND_MAX);
+	u[i]=f*random()/(1.+RAND_MAX);
+	for(int j=0;j<n;++j)
+       		 a(i,j)= f*random()/(1.+RAND_MAX);
         }
 //cout <<"a matrix \n"<<a;
 //cout<<"EXP\n";
@@ -695,6 +698,13 @@ c=exp(a,false);
 cout <<" tricky exp  took "<<clock()/((double) (CLOCKS_PER_SEC))-t<<endl;
 cout<<"error2 = "<<(c-b).norm()/b.norm()<<endl;
 
+v=b*u;
+t=clock()/((double) (CLOCKS_PER_SEC));
+w=exptimes(a,u);
+cout <<"exptimes  took "<<clock()/((double) (CLOCKS_PER_SEC))-t<<endl;
+cout <<"error of exptimes = "<<(v-w).norm()/v.norm()<<endl;
+
+
 }
 
 
diff --git a/vec.cc b/vec.cc
index 1dc802c..39441a6 100644
--- a/vec.cc
+++ b/vec.cc
@@ -450,6 +450,7 @@ void NRVec<double>::gemv(const double beta, const NRMat<double> &A,
 	if ((trans == 'n'?A.ncols():A.nrows()) != x.size())
 		laerror("incompatible sizes in gemv A*x");
 #endif
+	copyonwrite();
 	cblas_dgemv(CblasRowMajor, (trans=='n' ? CblasNoTrans:CblasTrans),
 			A.nrows(), A.ncols(), alpha, A, A.ncols(), x.v, 1, beta, v, 1);
 }
@@ -463,6 +464,7 @@ void NRVec< complex<double> >::gemv(const complex<double> beta,
 	if ((trans == 'n'?A.ncols():A.nrows()) != x.size()) 
 		laerror("incompatible sizes in gemv A*x");
 #endif
+	copyonwrite();
 	cblas_zgemv(CblasRowMajor, (trans=='n' ? CblasNoTrans:CblasTrans), 
 			A.nrows(), A.ncols(), &alpha, A, A.ncols(), 
 			x.v, 1, &beta, v, 1);
@@ -476,7 +478,7 @@ void NRVec<double>::gemv(const double beta, const NRSMat<double> &A,
 #ifdef DEBUG
 	if (A.ncols()!=x.size()) laerror("incompatible dimension in gemv A*x");
 #endif
-	NRVec<double> result(nn);
+	copyonwrite();
 	cblas_dspmv(CblasRowMajor, CblasLower, A.ncols(), alpha, A, x.v, 1, beta, v, 1);
 }
 
@@ -489,7 +491,7 @@ void NRVec< complex<double> >::gemv(const complex<double> beta,
 #ifdef DEBUG
 	if (A.ncols()!=x.size()) laerror("incompatible dimension in gemv");
 #endif
-	NRVec< complex<double> > result(nn);
+	copyonwrite();
 	cblas_zhpmv(CblasRowMajor, CblasLower, A.ncols(), &alpha, A, 
 			x.v, 1, &beta, v, 1);
 }
diff --git a/vec.h b/vec.h
index 590031a..43018c4 100644
--- a/vec.h
+++ b/vec.h
@@ -42,7 +42,8 @@ public:
 	inline NRVec(): nn(0),v(0),count(0){};
 	explicit inline NRVec(const int n) : nn(n), v(new T[n]), count(new int(1)) {};
 	inline NRVec(const T &a, const int n);
-	inline NRVec(const T *a, const int n);
+        inline NRVec(const T *a, const int n);
+	inline NRVec(T *a, const int n, bool skeleton);
 	inline NRVec(const NRVec &rhs);
 	inline explicit NRVec(const NRSMat<T> & S);
 #ifdef MATPTR
@@ -136,10 +137,27 @@ inline NRVec<T>::NRVec(const T& a, const int n) : nn(n), v(new T[n]), count(new
 }
 
 template <typename T>
-inline NRVec<T>::NRVec(const T *a, const int n) : nn(n), v(new T[n]), count(new int)
+inline NRVec<T>::NRVec(const T *a, const int n) : nn(n), count(new int)
 {
-	*count = 1;
-	memcpy(v, a, n*sizeof(T));
+		v=new T[n];
+		*count = 1;
+		memcpy(v, a, n*sizeof(T));
+}
+
+template <typename T>
+inline NRVec<T>::NRVec(T *a, const int n, bool skeleton) : nn(n), count(new int)
+{
+	if(!skeleton)
+		{
+		v=new T[n];
+		*count = 1;
+		memcpy(v, a, n*sizeof(T));
+		}
+	else
+		{
+		*count = 2;
+		v=a;
+		}
 }
 
 template <typename T>