/* LA: linear algebra C++ interface library Copyright (C) 2008 Jiri Pittner or This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef _MATEXP_H_ #define _MATEXP_H_ //general routine for polynomial of a matrix, tuned to minimize the number //of matrix-matrix multiplications on cost of additions and memory // the polynom and exp routines will work on any type, for which traits class // is defined containing definition of an element type, norm and axpy operation #include "la_traits.h" #include "laerror.h" #ifndef NONCBLAS extern "C" { #include "cblas.h" #include "clapack.h" } #else #include "noncblas.h" #endif template const T polynom0(const T &x, const NRVec &c) { int order=c.size()-1; T z,y; //trivial reference implementation by horner scheme if(order==0) {y=x; y=c[0];} //to avoid the problem: we do not know the size of the matrix to contruct a scalar one else { int i; z=x*c[order]; for(i=order-1; i>=0; i--) { if(i const T polynom(const T &x, const NRVec &c) { int n=c.size()-1; int i,j,k,m=0,t; if(n<=4) return polynom0(x,c); //here the horner scheme is optimal //first find m which minimizes the number of multiplications j=10*n; for(i=2;i<=n+1;i++) { t=i-2+2*(n/i)-(n%i)?0:1; if(tn) break; if(j==0) { if(i==0) s=x; /*just to get the dimensions of the matrix*/ s=c[k]; /*create diagonal matrix*/ } else LA_traits::axpy(s,xpows[j-1],c[k]); //general s+=xpows[j-1]*c[k]; but more efficient for matrices } if(i==0) {r=s; f=xpows[m-1];} else { r+= s*f; f=f*xpows[m-1]; } } delete[] xpows; return r; } //for general objects template const T ncommutator ( const T &x, const T &y, int nest=1, const bool right=1) { T z; if(right) {z=x; while(--nest>=0) z=z*y-y*z;} else {z=y; while(--nest>=0) z=x*z-z*x;} return z; } template const T nanticommutator ( const T &x, const T &y, int nest=1, const bool right=1) { T z; if(right) {z=x; while(--nest>=0) z=z*y+y*z;} else {z=y; while(--nest>=0) z=x*z+z*x;} return z; } //general BCH expansion (can be written more efficiently in a specialization for matrices) template const T BCHexpansion (const T &h, const T &t, const int n, const bool verbose=0)\ { T result=h; double factor=1.; T z=h; for(int i=1; i<=n; ++i) { factor/=i; z= z*t-t*z; if(verbose) cerr << "BCH contribution at order "< const T ipow( const T &x, int i) { if(i<0) laerror("negative exponent in ipow"); if(i==0) {T r=x; r=(typename LA_traits::elementtype)1; return r;}//trick for matrix dimension if(i==1) return x; T y,z; z=x; while(!(i&1)) { z = z*z; i >>= 1; } y=z; while((i >>= 1)/*!=0*/) { z = z*z; if(i&1) y = y*z; } return y; } inline int nextpow2(const double n) { const double log2=log(2.); if(n<=.75) return 0; //try to keep the taylor expansion short if(n<=1.) return 1; return int(ceil(log(n)/log2-log(.75))); } template NRVec exp_aux(const T &x, int &power,int maxpower= -1, int maxtaylor= -1) { //should better be computed by mathematica to have accurate last digits, chebyshev instead, see exp in glibc static double exptaylor[]={ 1., 1., 0.5, 0.1666666666666666666666, 0.0416666666666666666666, 0.0083333333333333333333, 0.0013888888888888888888, 0.00019841269841269841253, 2.4801587301587301566e-05, 2.7557319223985892511e-06, 2.7557319223985888276e-07, 2.5052108385441720224e-08, 2.0876756987868100187e-09, 1.6059043836821613341e-10, 1.1470745597729724507e-11, 7.6471637318198164055e-13, 4.7794773323873852534e-14, 2.8114572543455205981e-15, 1.5619206968586225271e-16, 8.2206352466243294955e-18, 4.1103176233121648441e-19, 0.}; double mnorm= x.norm(); power=nextpow2(mnorm); if(maxpower>=0 && power>maxpower) power=maxpower; double scale=exp(-log(2.)*power); //find how long taylor expansion will be necessary const double precision=1e-14; //decreasing brings nothing double s,t; s=mnorm*scale; int n=0; t=1.; do { n++; t*=s; } while(t*exptaylor[n]>precision);//taylor 0 will terminate in any case if(maxtaylor>=0 && n>maxtaylor) n=maxtaylor; //useful e.g. if the matrix is nilpotent in order n+1 as the CC T operator for n electrons int i; //adjust the coefficients in order to avoid scaling the argument NRVec taylor2(n+1); for(i=0,t=1.;i<=n;i++) { taylor2[i]=exptaylor[i]*t; t*=scale; } return taylor2; } //it seems that we do not gain anything by polynom vs polynom0, check the m-optimization! template const T exp(const T &x, bool horner=true, int maxpower= -1, int maxtaylor= -1 ) { int power; //prepare the polynom of and effectively scale T NRVec::elementtype> taylor2=exp_aux::elementtype>(x,power,maxpower,maxtaylor); T r= horner?polynom0(x,taylor2):polynom(x,taylor2); //for accuracy summing from the smallest terms up would be better, but this is more efficient for matrices //power the result back for(int i=0; i void exptimesdestructive(const M &mat, V &result, V &rhs, bool transpose=false, const double scale=1., int maxpower= -1, int maxtaylor= -1, bool mat_is_0=false) //uses just matrix vector multiplication { if(mat_is_0) {result = rhs; LA_traits::copyonwrite(result); return;} //prevent returning a shallow copy of rhs if(mat.nrows()!=mat.ncols()||(unsigned int) mat.nrows() != (unsigned int)rhs.size()) laerror("inappropriate sizes in exptimes"); int power; //prepare the polynom of and effectively scale the matrix NRVec::elementtype> taylor2=exp_aux::elementtype>(mat,power,maxpower,maxtaylor); V tmp; for(int i=1; i<=(1<1) rhs=result; //apply again to the result of previous application else result=rhs; tmp=rhs; //now rhs can be used as scratch result*=taylor2[0]; for(int j=1; j const V exptimes(const M &mat, V rhs, bool transpose=false, const double scale=1., int maxpower= -1, int maxtaylor= -1, bool mat_is_0=false ) { V result; exptimesdestructive(mat,result,rhs,transpose,scale,maxpower,maxtaylor,mat_is_0); return result; } //@@@ power series matrix logarithm? #endif