/* LA: linear algebra C++ interface library Copyright (C) 2008 Jiri Pittner or This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef _MATEXP_H_ #define _MATEXP_H_ //general routine for polynomial of a matrix, tuned to minimize the number //of matrix-matrix multiplications on cost of additions and memory // the polynom and exp routines will work on any type, for which traits class // is defined containing definition of an element type, norm and axpy operation #include "la_traits.h" #include "laerror.h" #include namespace LA { template const T polynom0(const T &x, const NRVec &c) { int order=c.size()-1; T z,y; //trivial reference implementation by horner scheme if(order==0) {y=x; y=c[0];} //to avoid the problem: we do not know the size of the matrix to contruct a scalar one else { int i; z=x*c[order]; for(i=order-1; i>=0; i--) { //std::cerr<<"TEST polynom0 "<::deallocate(z); z=y*x;} //for large matrices avoid storing 4 ones simultaneously LA_traits::deallocate(y); y=z+c[i]; } } return y; } //algorithm which minimazes number of multiplications, at the cost of storage template const T polynom(const T &x, const NRVec &c) { int n=c.size()-1; int i,j,k,m=0,t; if(n<=4) return polynom0(x,c); //here the horner scheme is optimal //first find m which minimizes the number of multiplications j=10*n; for(i=2;i<=n+1;i++) { t=i-2+2*(n/i)-(n%i)?0:1; if(tn) break; if(j==0) { if(i==0) s=x; /*just to get the dimensions of the matrix*/ s=c[k]; /*create diagonal matrix*/ } else LA_traits::axpy(s,xpows[j-1],c[k]); //general s+=xpows[j-1]*c[k]; but more efficient for matrices } if(i==0) {r=s; f=xpows[m-1];} else { r+= s*f; f=f*xpows[m-1]; } } delete[] xpows; return r; } //for general objects template const T ncommutator ( const T &x, const T &y, int nest=1, const bool right=1) { T z; if(right) {z=x; while(--nest>=0) z=z*y-y*z;} else {z=y; while(--nest>=0) z=x*z-z*x;} return z; } template const T nanticommutator ( const T &x, const T &y, int nest=1, const bool right=1) { T z; if(right) {z=x; while(--nest>=0) z=z*y+y*z;} else {z=y; while(--nest>=0) z=x*z+z*x;} return z; } //general BCH expansion (can be written more efficiently in a specialization for matrices) template const T BCHexpansion (const T &h, const T &t, const int nmax, const bool verbose=0, const bool right=1) { T result=h; double factor=1.; T z=h; for(int i=1; i<=nmax; ++i) { factor/=i; if(right) z= z*t-t*z; else z= t*z-z*t; if(verbose) std::cerr << "BCH contribution at order "< = H |x> + [H,T] |x> + 1/2 [[H,T],T] |x> +1/3! [[[H,T],T],T] |x> + ... (for right=1) //defining C_n^j(x) = [...[H,T],...T]_n T^j |x> //we precompute C_0^j(x) = H T^j |x> up to j=nmax //and then recursively C_n^j(x) = C_{n-1}^{j+1}(x) - T C_{n-1}^j(x) //we overwrite C_{n-1} by C_n in place and free the last one to save memory //and accumulate the final results on the fly //for left, C_0^j(x) remains same //definition is C_n^j(x) = [T,...[T,H]]_n T^j |x> //and the recursion is C_n^j(x) = T C_{n-1}^j(x) - C_{n-1}^{j+1}(x) template const V BCHtimes(const M1 &H, const char transH, const M2 &T, const char transT, const V &x, const int nmax, const bool right=1) { double factor=1.; NRVec c(nmax+1); c[0]=x; for(int j=1; j<=nmax; ++j) { c[j].resize(x.size()); T.gemv(0,c[j],transT,1,c[j-1]); } for(int j=0; j<=nmax; ++j) { V tmp(x.size()); H.gemv(0,tmp,transH,1,c[j]); c[j]=tmp; } V result = c[0]; for(int i=1; i<=nmax; ++i) { //recursive step in the dummy n index of c, overwriting on the fly for(int j=0; j<=nmax-i; ++j) { V tmp = c[j+1]; if(right) T.gemv(1,tmp,transT,-1,c[j]); else T.gemv(-1,tmp,transT,1,c[j]); c[j] = tmp; } c[nmax-i+1].resize(0); //free unnecessary memory //accumulate the series factor/=i; result += c[0] * factor; } return result; } template const T ipow( const T &x, int i) { if(i<0) laerror("negative exponent in ipow"); if(i==0) {T r=x; r=(typename LA_traits::elementtype)1; return r;}//trick for matrix dimension if(i==1) return x; T y,z; z=x; while(!(i&1)) { z = z*z; i >>= 1; } y=z; while((i >>= 1)/*!=0*/) { z = z*z; if(i&1) y = y*z; } return y; } inline int mynextpow2(const double n) { const double log2=std::log(2.); if(n<=.75) return 0; //try to keep the taylor expansion short if(n<=1.) return 1; return int(std::ceil(std::log(n)/log2-std::log(.75))); } //should better be computed by mathematica to have accurate last digits, perhaps chebyshev instead, see exp in glibc //is shared also for sine and cosine now static const double exptaylor[]={ 1., 1., 0.5, 0.1666666666666666666666, 0.0416666666666666666666, 0.0083333333333333333333, 0.0013888888888888888888, 0.00019841269841269841253, 2.4801587301587301566e-05, 2.7557319223985892511e-06, 2.7557319223985888276e-07, 2.5052108385441720224e-08, 2.0876756987868100187e-09, 1.6059043836821613341e-10, 1.1470745597729724507e-11, 7.6471637318198164055e-13, 4.7794773323873852534e-14, 2.8114572543455205981e-15, 1.5619206968586225271e-16, 8.2206352466243294955e-18, 4.1103176233121648441e-19, 1.9572941063391262595e-20, 0.}; //S is element type of T, but T may be any user-defined template NRVec exp_aux(const T &x, int &power, int maxpower, int maxtaylor, S prescale) { double mnorm= x.norm() * std::abs(prescale); power=mynextpow2(mnorm); if(maxpower>=0 && power>maxpower) power=maxpower; double scale=std::exp(-std::log(2.)*power); //find how long taylor expansion will be necessary const double precision=1e-14; //further decreasing brings nothing double s,t; s=mnorm*scale; int n=0; t=1.; do { n++; t*=s; } while(t*exptaylor[n]>precision);//taylor 0 will terminate in any case if(maxtaylor>=0 && n>maxtaylor) n=maxtaylor; //useful e.g. if the matrix is nilpotent in order n+1 as the CC T operator for n electrons int i; //adjust the coefficients in order to avoid scaling the argument NRVec taylor2(n+1); for(i=0,t=1.;i<=n;i++) { taylor2[i]=exptaylor[i]*t; t*=scale; } //std::cout <<"TEST power, scale "< void sincos_aux(NRVec &si, NRVec &co, const T &x, int &power,int maxpower, int maxtaylor, const S prescale) { double mnorm= x.norm() * std::abs(prescale); power=mynextpow2(mnorm); if(maxpower>=0 && power>maxpower) power=maxpower; double scale=std::exp(-std::log(2.)*power); //find how long taylor expansion will be necessary const double precision=1e-14; //further decreasing brings nothing double s,t; s=mnorm*scale; int n=0; t=1.; do { n++; t*=s; } while(t*exptaylor[n]>precision);//taylor 0 will terminate in any case if(maxtaylor>=0 && n>maxtaylor) n=maxtaylor; //useful e.g. if the matrix is nilpotent in order n+1 as the CC T operator for n electrons if((n&1)==0) ++n; //force it to be odd to have same length in sine and cosine si.resize((n+1)/2); co.resize((n+1)/2); int i; //adjust the coefficients in order to avoid scaling the argument for(i=0,t=1.;i<=n;i++) { if(i&1) si[i>>1] = exptaylor[i]* (i&2?-t:t); else co[i>>1] = exptaylor[i]* (i&2?-t:t); t*=scale; } //std::cout <<"TEST sin "< const T exp(const T &x, bool horner=true, int maxpower= -1, int maxtaylor= -1 ) { int power; //prepare the polynom of and effectively scale T NRVec::elementtype> taylor2=exp_aux::elementtype,double>(x,power,maxpower,maxtaylor,1.); //std::cerr <<"TEST power "< void sincos(T &s, T &c, const T &x, bool horner=true, int maxpower= -1, int maxtaylor= -1 ) { int power; NRVec::normtype> taylors,taylorc; sincos_aux::normtype>(taylors,taylorc,x,power,maxpower,maxtaylor,1.); //could we save something by computing both polynoms simultaneously? { T x2 = x*x; s = horner?polynom0(x2,taylors):polynom(x2,taylors); c = horner?polynom0(x2,taylorc):polynom(x2,taylorc); } s = s * x; //power the results back for(int i=0; i NRMat > expi(const NRMat &x) { NRMat r,i; sincos(i,r,x); return complexify(r,i); } //this simple implementation seems not to be numerically stable enough //and probably not efficient either template void exptimesdestructive(const M &mat, V &result, V &rhs, bool transpose, const MEL scale, int maxpower= -1, int maxtaylor= -1, bool mat_is_0=false) //uses just matrix vector multiplication { if(mat_is_0) {result = rhs; LA_traits::copyonwrite(result); return;} //prevent returning a shallow copy of rhs if(mat.nrows()!=mat.ncols()||(unsigned int) mat.nrows() != (unsigned int)rhs.size()) laerror("inappropriate sizes in exptimes"); int power; //prepare the polynom of and effectively scale the matrix NRVec::normtype> taylor2=exp_aux::normtype>(mat,power,maxpower,maxtaylor,scale); V tmp; bool washere=0; for(int i=1; i<=(1<1) rhs=result; //apply again to the result of previous application else result=rhs; tmp=rhs; //now rhs can be used as scratch result*=taylor2[0]; for(int j=1; j const V exptimes(const M &mat, V rhs, bool transpose=false, const typename LA_traits::elementtype scale=1., int maxpower= -1, int maxtaylor= -1, bool mat_is_0=false ) { V result; exptimesdestructive(mat,result,rhs,transpose,scale,maxpower,maxtaylor,mat_is_0); return result; } template const V exptimesreal(const M &mat, V rhs, bool transpose=false, const typename LA_traits::normtype scale=1., int maxpower= -1, int maxtaylor= -1, bool mat_is_0=false ) { V result; exptimesdestructive(mat,result,rhs,transpose,scale,maxpower,maxtaylor,mat_is_0); return result; } template void sincostimes_simple(const M &mat, V &si, V &co, const V &rhs, const NRVec::normtype> &taylors, const NRVec::normtype> &taylorc, bool transpose, const S scale) { si=rhs * taylors[0]; co=rhs * taylorc[0]; V tmp=rhs; for(int j=1; j void sincostimes_aux(const M &mat, V &si, V &co, const V &rhs, const NRVec::normtype> &taylors, const NRVec::normtype> &taylorc, bool transpose, const S scale, int power) { if(power==0) sincostimes_simple(mat,si,co,rhs,taylors,taylorc,transpose,scale); else { V si2,co2; //no large memory allocated yet - size 0 sincostimes_aux(mat,si2,co2,rhs,taylors,taylorc,transpose,scale,power-1); sincostimes_aux(mat,si,co,co2,taylors,taylorc,transpose,scale,power-1); V ss,cs; sincostimes_aux(mat,ss,cs,si2,taylors,taylorc,transpose,scale,power-1); co -= ss; si += cs; } } //inefficient, it is better to use complex exptimes! //again scale should actually be elementtype of M which is inaccessible template void sincostimes(const M &mat, V &si, V &co, const V &rhs, bool transpose=false, const typename LA_traits::normtype scale=1., int maxpower= -1, int maxtaylor= -1, bool mat_is_0=false) //uses just matrix vector multiplication { if(mat_is_0) //prevent returning a shallow copy of rhs { co = rhs; LA_traits::copyonwrite(co); LA_traits::clearme(si); return; } if(mat.nrows()!=mat.ncols()||(unsigned int) mat.nrows() != (unsigned int)rhs.size()) laerror("inappropriate sizes in sincostimes"); //prepare the polynom of and effectively scale the matrix int power; NRVec::normtype> taylors,taylorc; sincos_aux::normtype>(taylors,taylorc,mat,power,maxpower,maxtaylor,scale); if(taylors.size()!=taylorc.size()) laerror("internal error - same size of sin and cos expansions assumed"); //the actual computation and resursive "squaring" //std::cout <<"TEST power "<