diff --git a/mat.cc b/mat.cc index 1951229..924e442 100644 --- a/mat.cc +++ b/mat.cc @@ -19,6 +19,7 @@ template class NRMat; template class NRMat; template class NRMat; template class NRMat; +template class NRMat; template class NRMat; @@ -123,8 +124,10 @@ NRMat & NRMat::operator=(const T &a) if (nn != mm) laerror("RMat.operator=scalar on non-square matrix"); #endif #ifdef MATPTR + memset(v[0],0,nn*nn*sizeof(T)); for (int i=0; i< nn; i++) v[i][i] = a; #else + memset(v,0,nn*nn*sizeof(T)); for (int i=0; i< nn*nn; i+=nn+1) v[i] = a; #endif return *this; @@ -1067,6 +1070,7 @@ INSTANTIZE(short) INSTANTIZE(char) INSTANTIZE(unsigned char) INSTANTIZE(unsigned long) +INSTANTIZE(unsigned int) export template diff --git a/matexp.h b/matexp.h index bd7f6b5..269b0a2 100644 --- a/matexp.h +++ b/matexp.h @@ -8,7 +8,7 @@ #include "la_traits.h" template -const T polynom2(const T &x, const NRVec &c) +const T polynom0(const T &x, const NRVec &c) { int order=c.size()-1; T z,y; @@ -30,13 +30,14 @@ return y; } +//algorithm which minimazes number of multiplications, at the cost of storage template const T polynom(const T &x, const NRVec &c) { int n=c.size()-1; int i,j,k,m=0,t; -if(n<=4) return polynom2(x,c); //here the horner scheme is optimal +if(n<=4) return polynom0(x,c); //here the horner scheme is optimal //first find m which minimizes the number of multiplications j=10*n; @@ -50,6 +51,7 @@ for(i=2;i<=n+1;i++) } } + //allocate array for powers up to m T *xpows = new T[m]; xpows[0]=x; @@ -65,7 +67,10 @@ for(i=0; i<=n/m;i++) { k++; if(k>n) break; - if(j==0) {if(i==0) s=x; /*just to get the dimensions of the matrix*/ s=c[k]; /*create diagonal matrix*/} + if(j==0) { + if(i==0) s=x; /*just to get the dimensions of the matrix*/ + s=c[k]; /*create diagonal matrix*/ + } else LA_traits::axpy(s,xpows[j-1],c[k]); //general s+=xpows[j-1]*c[k]; but more efficient for matrices } @@ -196,6 +201,7 @@ do { while(t*exptaylor[n]>precision);//taylor 0 will terminate in any case + int i; //adjust the coefficients in order to avoid scaling the argument NRVec::elementtype> taylor2(n+1); for(i=0,t=1.;i<=n;i++) @@ -209,14 +215,16 @@ return taylor2; template -const T exp(const T &x) +const T exp(const T &x, const bool simple=false) { int power; //prepare the polynom of and effectively scale T NRVec::elementtype> taylor2=exp_aux(x,power); -T r=polynom(x,taylor2); //for accuracy summing from the smallest terms up would be better, but this is more efficient for matrices + +T r= simple?polynom0(x,taylor2):polynom(x,taylor2); +//for accuracy summing from the smallest terms up would be better, but this is more efficient for matrices //power the result back for(int i=0; i >; template class NRSMat; template class NRSMat; template class NRSMat; +template class NRSMat; template class NRSMat; @@ -80,6 +81,7 @@ template NRSMat & NRSMat::operator=(const T &a) { copyonwrite(); + memset(v,0,NN2*sizeof(T)); for (int i=0; i) INSTANTIZE(int) INSTANTIZE(short) INSTANTIZE(char) +INSTANTIZE(unsigned int) INSTANTIZE(unsigned long)