#ifndef _MATEXP_H_ #define _MATEXP_H_ //general routine for polynomial of a matrix, tuned to minimize the number //of matrix-matrix multiplications on cost of additions and memory // the polynom and exp routines will work on any type, for which traits class // is defined containing definition of an element type, norm and axpy operation #include "la_traits.h" template const T polynom0(const T &x, const NRVec &c) { int order=c.size()-1; T z,y; //trivial reference implementation by horner scheme if(order==0) {y=x; y=c[0];} //to avoid the problem: we do not know the size of the matrix to contruct a scalar one else { int i; z=x*c[order]; for(i=order-1; i>=0; i--) { if(i const T polynom(const T &x, const NRVec &c) { int n=c.size()-1; int i,j,k,m=0,t; if(n<=4) return polynom0(x,c); //here the horner scheme is optimal //first find m which minimizes the number of multiplications j=10*n; for(i=2;i<=n+1;i++) { t=i-2+2*(n/i)-(n%i)?0:1; if(tn) break; if(j==0) { if(i==0) s=x; /*just to get the dimensions of the matrix*/ s=c[k]; /*create diagonal matrix*/ } else LA_traits::axpy(s,xpows[j-1],c[k]); //general s+=xpows[j-1]*c[k]; but more efficient for matrices } if(i==0) {r=s; f=xpows[m-1];} else { r+= s*f; f=f*xpows[m-1]; } } delete[] xpows; return r; } //for general objects template const T ncommutator ( const T &x, const T &y, int nest=1, const bool right=1) { T z; if(right) {z=x; while(--nest>=0) z=z*y-y*z;} else {z=y; while(--nest>=0) z=x*z-z*x;} return z; } template const T nanticommutator ( const T &x, const T &y, int nest=1, const bool right=1) { T z; if(right) {z=x; while(--nest>=0) z=z*y+y*z;} else {z=y; while(--nest>=0) z=x*z+z*x;} return z; } //general BCH expansion (can be written more efficiently in a specialization for matrices) template const T BCHexpansion (const T &h, const T &t, const int n, const bool verbose=0)\ { T result=h; double factor=1.; T z=h; for(int i=1; i<=n; ++i) { factor/=i; z= z*t-t*z; if(verbose) cerr << "BCH contribution at order "< const T ipow( const T &x, int i) { if(i<0) laerror("negative exponent in ipow"); if(i==0) {T r=x; r=(typename LA_traits::elementtype)1; return r;}//trick for matrix dimension if(i==1) return x; T y,z; z=x; while(!(i&1)) { z = z*z; i >>= 1; } y=z; while((i >>= 1)/*!=0*/) { z = z*z; if(i&1) y = y*z; } return y; } inline int nextpow2(const double n) { const double log2=log(2.); if(n<=.75) return 0; //try to keep the taylor expansion short if(n<=1.) return 1; return int(ceil(log(n)/log2-log(.75))); } template NRVec exp_aux(const T &x, int &power) { //should better be computed by mathematica to have accurate last digits, chebyshev instead, see exp in glibc static double exptaylor[]={ 1., 1., 0.5, 0.1666666666666666666666, 0.0416666666666666666666, 0.0083333333333333333333, 0.0013888888888888888888, 0.00019841269841269841253, 2.4801587301587301566e-05, 2.7557319223985892511e-06, 2.7557319223985888276e-07, 2.5052108385441720224e-08, 2.0876756987868100187e-09, 1.6059043836821613341e-10, 1.1470745597729724507e-11, 7.6471637318198164055e-13, 4.7794773323873852534e-14, 2.8114572543455205981e-15, 1.5619206968586225271e-16, 8.2206352466243294955e-18, 4.1103176233121648441e-19, 0.}; double mnorm= x.norm(); power=nextpow2(mnorm); double scale=exp(-log(2.)*power); //find how long taylor expansion will be necessary const double precision=1e-14; //decreasing brings nothing double s,t; s=mnorm*scale; int n=0; t=1.; do { n++; t*=s; } while(t*exptaylor[n]>precision);//taylor 0 will terminate in any case int i; //adjust the coefficients in order to avoid scaling the argument NRVec taylor2(n+1); for(i=0,t=1.;i<=n;i++) { taylor2[i]=exptaylor[i]*t; t*=scale; } return taylor2; } //it seems that we do not gain anything by polynom vs polynom0, check the m-optimization! template const T exp(const T &x, const bool horner=true) { int power; //prepare the polynom of and effectively scale T NRVec::elementtype> taylor2=exp_aux::elementtype>(x,power); T r= horner?polynom0(x,taylor2):polynom(x,taylor2); //for accuracy summing from the smallest terms up would be better, but this is more efficient for matrices //power the result back for(int i=0; i const V exptimesnaive(const M &mat, V vec) //uses just matrix vector multiplication { if(mat.nrows()!=mat.ncols()||(unsigned int) mat.nrows() != (unsigned int)vec.size()) laerror("inappropriate sizes in exptimes"); int power; //prepare the polynom of and effectively scale the matrix NRVec::elementtype> taylor2=exp_aux::elementtype>(mat,power); V result(mat.nrows()); cerr <<"power = "<1) vec=result; //apply again to the result of previous application //apply polynom of the matrix to the vector iteratively V y=vec; result=y*taylor2[0]; for(int j=1; j extern void exptimes(const M &mat, NRVec &result, bool transpose, const double t, const NRVec &rhs) { if(mat.nrows()!= mat.ncols()) laerorr("non-square matrix in exptimes"); n=mat.nrows(); if(result.size()!=n || rhs.size()!=n) laerorr("inconsistent vector and matrix size in exptimes"); // dgexpv.f -- translated by f2c (version 20030320). int dgexpv(int n, int m, double t, double *v, double *w, double tol, double *anorm, double *wsp, int *lwsp, int *iwsp, int *liwsp, int *itrace, int mxstep=1000, int mxreject=0, int ideg=6) { static const double c_b4 = 1.; static const int c__1 = 1; static const double c_b8 = 10.; static const double c_b25 = 0.; int iflag; /* System generated locals */ int i__1, i__2; double d__1; double d_sign(double *, double *), pow_di(double *, int *), pow_dd(double *, double *), d_lg10(double *); int i_dnnt(double *); double d_int(double *); /* Local variables */ int ibrkflag; double step_min__, step_max__; int i__, j; double break_tol__; int k1; double p1, p2, p3; int ih, mh, iv, ns, mx; double xm; int j1v; double hij, sgn, eps, hj1j, sqr1, beta; extern double ddot_(int *, double *, int *, double *, int *); double hump; extern double dnrm2_(int *, double *, int *); extern /* Subroutine */ int dscal_(int *, double *, double *, int *); int ifree, lfree; double t_old__; extern /* Subroutine */ int dgemv_(char *, int *, int *, double *, double *, int *, double *, int *, double *, double *, int *, ftnlen); int iexph; double t_new__; extern /* Subroutine */ int dcopy_(int *, double *, int *, double *, int *); int nexph; extern /* Subroutine */ int daxpy_(int *, double *, double *, int *, double *, int *); double t_now__; int nstep; double t_out__; int nmult; double vnorm; extern "C" void FORNAME(dgpadm)(const int *, int *, double *, double *, int *, double *, int *, int *, int *, int *, int *) extern "C" void FORNAME(dnchbv)(int *, double *, double *, int *, double *, double *); int nscale; double rndoff, t_step__, avnorm; int ireject; double err_loc__; int nreject, mbrkdwn; double tbrkdwn, s_error__, x_error__; /* -----Purpose----------------------------------------------------------| */ /* --- DGEXPV computes w = exp(t*A)*v - for a General matrix A. */ /* It does not compute the matrix exponential in isolation but */ /* instead, it computes directly the action of the exponential */ /* operator on the operand vector. This way of doing so allows */ /* for addressing large sparse problems. */ /* The method used is based on Krylov subspace projection */ /* techniques and the matrix under consideration interacts only */ /* via the external routine `matvec' performing the matrix-vector */ /* product (matrix-free method). */ /* -----Arguments--------------------------------------------------------| */ /* n : (input) order of the principal matrix A. */ /* m : (input) maximum size for the Krylov basis. */ /* t : (input) time at wich the solution is needed (can be < 0). */ /* v(n) : (input) given operand vector. */ /* w(n) : (output) computed approximation of exp(t*A)*v. */ /* tol : (input/output) the requested accuracy tolerance on w. */ /* If on input tol=0.0d0 or tol is too small (tol.le.eps) */ /* the internal value sqrt(eps) is used, and tol is set to */ /* sqrt(eps) on output (`eps' denotes the machine epsilon). */ /* (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol) */ /* anorm : (input) an approximation of some norm of A. */ /* wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+2)^2+4*(m+2)^2+ideg+1 */ /* +---------+-------+---------------+ */ /* (actually, ideg=6) V H wsp for PADE */ /* iwsp(liwsp): (workspace) liwsp .ge. m+2 */ /* matvec : external subroutine for matrix-vector multiplication. */ /* synopsis: matvec( x, y ) */ /* double precision x(*), y(*) */ /* computes: y(1:n) <- A*x(1:n) */ /* where A is the principal matrix. */ /* itrace : (input) running mode. 0=silent, 1=print step-by-step info */ /* iflag : (output) exit flag. */ /* <0 - bad input arguments */ /* 0 - no problem */ /* 1 - maximum number of steps reached without convergence */ /* 2 - requested tolerance was too high */ /* -----Accounts on the computation--------------------------------------| */ /* Upon exit, an interested user may retrieve accounts on the */ /* computations. They are located in wsp and iwsp as indicated below: */ /* location mnemonic description */ /* -----------------------------------------------------------------| */ /* iwsp(1) = nmult, number of matrix-vector multiplications used */ /* iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated */ /* iwsp(3) = nscale, number of repeated squaring involved in Pade */ /* iwsp(4) = nstep, number of integration steps used up to completion */ /* iwsp(5) = nreject, number of rejected step-sizes */ /* iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise */ /* iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured */ /* -----------------------------------------------------------------| */ /* wsp(1) = step_min, minimum step-size used during integration */ /* wsp(2) = step_max, maximum step-size used during integration */ /* wsp(3) = dummy */ /* wsp(4) = dummy */ /* wsp(5) = x_error, maximum among all local truncation errors */ /* wsp(6) = s_error, global sum of local truncation errors */ /* wsp(7) = tbrkdwn, if `happy breakdown', time when it occured */ /* wsp(8) = t_now, integration domain successfully covered */ /* wsp(9) = hump, i.e., max||exp(sA)||, s in [0,t] (or [t,0] if t<0) */ /* wsp(10) = ||w||/||v||, scaled norm of the solution w. */ /* -----------------------------------------------------------------| */ /* The `hump' is a measure of the conditioning of the problem. The */ /* matrix exponential is well-conditioned if hump = 1, whereas it is */ /* poorly-conditioned if hump >> 1. However the solution can still be */ /* relatively fairly accurate even when the hump is large (the hump */ /* is an upper bound), especially when the hump and the scaled norm */ /* of w [this is also computed and returned in wsp(10)] are of the */ /* same order of magnitude (further details in reference below). */ /* ----------------------------------------------------------------------| */ /* -----The following parameters may also be adjusted herein-------------| */ /* mxstep = 1000, . mxreject = 0, . ideg = 6, */ /* mxstep : maximum allowable number of integration steps. */ /* The value 0 means an infinite number of steps. */ /* mxreject: maximum allowable number of rejections at each step. */ /* The value 0 means an infinite number of rejections. */ /* ideg : the Pade approximation of type (ideg,ideg) is used as */ /* an approximation to exp(H). The value 0 switches to the */ /* uniform rational Chebyshev approximation of type (14,14) */ /* delta : local truncation error `safety factor' */ /* gamma : stepsize `shrinking factor' */ /* ----------------------------------------------------------------------| */ /* Roger B. Sidje (rbs@maths.uq.edu.au) */ /* EXPOKIT: Software Package for Computing Matrix Exponentials. */ /* ACM - Transactions On Mathematical Software, 24(1):130-156, 1998 */ /* ----------------------------------------------------------------------| */ /* --- check restrictions on input parameters ... */ /* Parameter adjustments */ --w; --v; --wsp; --iwsp; /* Function Body */ iflag = 0; /* Computing 2nd power */ i__1 = m + 2; if (*lwsp < n * (m + 2) + i__1 * i__1 * 5 + ideg + 1) { iflag = -1; } if (*liwsp < m + 2) { iflag = -2; } if (m >= n || m <= 0) { iflag = -3; } if (iflag != 0) { la_error("bad sizes (in input of DGEXPV)"); } /* --- initialisations ... */ k1 = 2; mh = m + 2; iv = 1; ih = iv + n * (m + 1) + n; ifree = ih + mh * mh; lfree = *lwsp - ifree + 1; ibrkflag = 0; mbrkdwn = m; nmult = 0; nreject = 0; nexph = 0; nscale = 0; t_out__ = abs(t); tbrkdwn = 0.; step_min__ = t_out__; step_max__ = 0.; nstep = 0; s_error__ = 0.; x_error__ = 0.; t_now__ = 0.; t_new__ = 0.; p1 = 1.3333333333333333; L1: p2 = p1 - 1.; p3 = p2 + p2 + p2; eps = (d__1 = p3 - 1., abs(d__1)); if (eps == 0.) { goto L1; } if (tol <= eps) { tol = sqrt(eps); } rndoff = eps * *anorm; break_tol__ = 1e-7; /* >>> break_tol = tol */ /* >>> break_tol = anorm*tol */ sgn = d_sign(&c_b4, t); dcopy_(n, &v[1], &c__1, &w[1], &c__1); beta = dnrm2_(n, &w[1], &c__1); vnorm = beta; hump = beta; /* --- obtain the very first stepsize ... */ sqr1 = sqrt(.1); xm = 1. / (double) (m); d__1 = (m + 1) / 2.72; i__1 = m + 1; p1 = tol * pow_di(&d__1, &i__1) * sqrt((m + 1) * 6.2800000000000002); d__1 = p1 / (beta * 4. * *anorm); t_new__ = 1. / *anorm * pow_dd(&d__1, &xm); d__1 = d_lg10(&t_new__) - sqr1; i__1 = i_dnnt(&d__1) - 1; p1 = pow_di(&c_b8, &i__1); d__1 = t_new__ / p1 + .55; t_new__ = d_int(&d__1) * p1; /* --- step-by-step integration ... */ L100: if (t_now__ >= t_out__) { goto L500; } ++nstep; /* Computing MIN */ d__1 = t_out__ - t_now__; t_step__ = min(d__1,t_new__); p1 = 1. / beta; i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { wsp[iv + i__ - 1] = p1 * w[i__]; } i__1 = mh * mh; for (i__ = 1; i__ <= i__1; ++i__) { wsp[ih + i__ - 1] = 0.; } /* --- Arnoldi loop ... */ j1v = iv + n; i__1 = m; for (j = 1; j <= i__1; ++j) { ++nmult; (matvec)(&wsp[j1v - n], &wsp[j1v]); i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { hij = ddot_(n, &wsp[iv + (i__ - 1) * n], &c__1, &wsp[j1v], &c__1) ; d__1 = -hij; daxpy_(n, &d__1, &wsp[iv + (i__ - 1) * n], &c__1, &wsp[j1v], & c__1); wsp[ih + (j - 1) * mh + i__ - 1] = hij; } hj1j = dnrm2_(n, &wsp[j1v], &c__1); /* --- if `happy breakdown' go straightforward at the end ... */ if (hj1j <= break_tol__) { /* print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j */ k1 = 0; ibrkflag = 1; mbrkdwn = j; tbrkdwn = t_now__; t_step__ = t_out__ - t_now__; goto L300; } wsp[ih + (j - 1) * mh + j] = hj1j; d__1 = 1. / hj1j; dscal_(n, &d__1, &wsp[j1v], &c__1); j1v += n; /* L200: */ } ++nmult; (matvec)(&wsp[j1v - n], &wsp[j1v]); avnorm = dnrm2_(n, &wsp[j1v], &c__1); /* --- set 1 for the 2-corrected scheme ... */ L300: wsp[ih + m * mh + m + 1] = 1.; /* --- loop while ireject p2 * 10.) { err_loc__ = p2; xm = 1. / (double) (m); } else if (p1 > p2) { err_loc__ = p1 * p2 / (p1 - p2); xm = 1. / (double) (m); } else { err_loc__ = p1; xm = 1. / (double) (m - 1); } } /* --- reject the step-size if the error is not acceptable ... */ if (k1 != 0 && err_loc__ > t_step__ * 1.2 * tol && (mxreject == 0 || ireject < mxreject)) { t_old__ = t_step__; d__1 = t_step__ * tol / err_loc__; t_step__ = t_step__ * .9 * pow_dd(&d__1, &xm); d__1 = d_lg10(&t_step__) - sqr1; i__1 = i_dnnt(&d__1) - 1; p1 = pow_di(&c_b8, &i__1); d__1 = t_step__ / p1 + .55; t_step__ = d_int(&d__1) * p1; if (*itrace != 0) { /* print*,'t_step =',t_old */ /* print*,'err_loc =',err_loc */ /* print*,'err_required =',delta*t_old*tol */ /* print*,'stepsize rejected, stepping down to:',t_step */ } ++ireject; ++nreject; if (mxreject != 0 && ireject > mxreject) { iflag = 2; laerror("Failure in DGEXPV: The requested tolerance is too high. Rerun with a smaller value."); } goto L401; } /* --- now update w = beta*V*exp(t_step*H)*e1 and the hump ... */ /* Computing MAX */ i__1 = 0, i__2 = k1 - 1; mx = mbrkdwn + max(i__1,i__2); dgemv_("n", n, &mx, &beta, &wsp[iv], n, &wsp[iexph], &c__1, &c_b25, &w[1], &c__1, (ftnlen)1); beta = dnrm2_(n, &w[1], &c__1); hump = max(hump,beta); /* --- suggested value for the next stepsize ... */ d__1 = t_step__ * tol / err_loc__; t_new__ = t_step__ * .9 * pow_dd(&d__1, &xm); d__1 = d_lg10(&t_new__) - sqr1; i__1 = i_dnnt(&d__1) - 1; p1 = pow_di(&c_b8, &i__1); d__1 = t_new__ / p1 + .55; t_new__ = d_int(&d__1) * p1; err_loc__ = max(err_loc__,rndoff); /* --- update the time covered ... */ t_now__ += t_step__; /* --- display and keep some information ... */ if (*itrace != 0) { /* print*,'integration',nstep,'---------------------------------' */ /* print*,'scale-square =',ns */ /* print*,'step_size =',t_step */ /* print*,'err_loc =',err_loc */ /* print*,'next_step =',t_new */ } step_min__ = min(step_min__,t_step__); step_max__ = max(step_max__,t_step__); s_error__ += err_loc__; x_error__ = max(x_error__,err_loc__); if (mxstep == 0 || nstep < mxstep) { goto L100; } iflag = 1; L500: /* iwsp(1) = nmult */ /* iwsp(2) = nexph */ /* iwsp(3) = nscale */ /* iwsp(4) = nstep */ /* iwsp(5) = nreject */ /* iwsp(6) = ibrkflag */ /* iwsp(7) = mbrkdwn */ /* wsp(1) = step_min */ /* wsp(2) = step_max */ /* wsp(3) = 0.0d0 */ /* wsp(4) = 0.0d0 */ /* wsp(5) = x_error */ /* wsp(6) = s_error */ /* wsp(7) = tbrkdwn */ /* wsp(8) = sgn*t_now */ /* wsp(9) = hump/vnorm */ /* wsp(10) = beta/vnorm */ if(iflag) laerror("dgexpv error"); return; } return; } //@@@ power series matrix logarithm? #endif