diff --git a/matexp.h b/matexp.h index 4b7f7f1..854e8ee 100644 --- a/matexp.h +++ b/matexp.h @@ -309,498 +309,6 @@ return result; - - -#ifdef EXPOKIT -//this comes from dgexpv from expokit, it had to be translated to C to make a template -// dgexpv.f -- translated by f2c (version 20030320). - -static double dipow(double x,int i) -{ -double y=1.0; -if(x==0.0) - { - if(i>0) return(0.0); - if(i==0) laerror("dipow: 0^0"); - return(1./x); - } -if(i) { - if(i<0) {i= -i; x= 1.0/x;} - do - { - if(i&1) y *= x; - x *= x; - } - while((i >>= 1)/*!=0*/); - } -return(y); -} - -static int nint(double x) -{ -int sgn= x>=0 ? 1 : -1; -return sgn * (int)(abs(x)+.5); -} - - -#ifdef FORTRAN_ -#define FORNAME(x) x##_ -#else -#define FORNAME(x) x -#endif - -extern "C" void FORNAME(dgpadm)(const int *, int *, double *, double *, int *, double *, int *, int *, int *, int *, int *); -extern "C" void FORNAME(dnchbv)(int *, double *, double *, int *, double *, double *); - -//partial template specialization leaving still free room for generic matrix type -template -extern void dgexpv(const M &mat, NRVec &result, const NRVec &rhs, bool transpose=false, const double t=1., int mxkrylov=0, double tol=1e-14, int mxstep=1000, int mxreject=0, int ideg=6) -{ -if(mat.nrows()!= mat.ncols()) laerror("non-square matrix in exptimes"); -int n=mat.nrows(); -int m; -if(mxkrylov) m=mxkrylov; else m= n>100? 100: (n>1?n-1:1); -if (m > n || m <= 0) laerror("illegal mxkrylov"); -if(result.size()!=n || rhs.size()!=n) laerror("inconsistent vector and matrix size in exptimes"); - -const double *v=rhs; -double *w=result; -double anorm=mat.norm(); - - -int iflag; - - /* System generated locals */ - int i__1, i__2; - double d__1; - -#define pow_dd(x,y) (exp(y*log(x))) - - /* Local variables */ - int ibrkflag; - double step_min__, step_max__; - int i__, j; - double break_tol__; - int k1; - double p1, p2, p3; - int ih, mh, iv, ns, mx; - double xm; - int j1v; - double hij, sgn, eps, hj1j, sqr1, beta; - double hump; - int ifree, lfree; - double t_old__; - int iexph; - double t_new__; - int nexph; - double t_now__; - int nstep; - double t_out__; - int nmult; - double vnorm; - int nscale; - double rndoff, t_step__, avnorm; - int ireject; - double err_loc__; - int nreject, mbrkdwn; - double tbrkdwn, s_error__, x_error__; - -/* -----Purpose----------------------------------------------------------| */ - -/* --- DGEXPV computes w = exp(t*A)*v - for a General matrix A. */ - -/* It does not compute the matrix exponential in isolation but */ -/* instead, it computes directly the action of the exponential */ -/* operator on the operand vector. This way of doing so allows */ -/* for addressing large sparse problems. */ - -/* The method used is based on Krylov subspace projection */ -/* techniques and the matrix under consideration interacts only */ -/* via the external routine `matvec' performing the matrix-vector */ -/* product (matrix-free method). */ - -/* -----Arguments--------------------------------------------------------| */ - -/* n : (input) order of the principal matrix A. */ - -/* m : (input) maximum size for the Krylov basis. */ - -/* t : (input) time at wich the solution is needed (can be < 0). */ - -/* v(n) : (input) given operand vector. */ - -/* w(n) : (output) computed approximation of exp(t*A)*v. */ - -/* tol : (input/output) the requested accuracy tolerance on w. */ -/* If on input tol=0.0d0 or tol is too small (tol.le.eps) */ -/* the internal value sqrt(eps) is used, and tol is set to */ -/* sqrt(eps) on output (`eps' denotes the machine epsilon). */ -/* (`Happy breakdown' is assumed if h(j+1,j) .le. anorm*tol) */ - -/* anorm : (input) an approximation of some norm of A. */ - -/* wsp(lwsp): (workspace) lwsp .ge. n*(m+1)+n+(m+2)^2+4*(m+2)^2+ideg+1 */ -/* +---------+-------+---------------+ */ -/* (actually, ideg=6) V H wsp for PADE */ - -/* iwsp(liwsp): (workspace) liwsp .ge. m+2 */ - -/* matvec : external subroutine for matrix-vector multiplication. */ -/* synopsis: matvec( x, y ) */ -/* double precision x(*), y(*) */ -/* computes: y(1:n) <- A*x(1:n) */ -/* where A is the principal matrix. */ - - -/* iflag : (output) exit flag. */ -/* <0 - bad input arguments */ -/* 0 - no problem */ -/* 1 - maximum number of steps reached without convergence */ -/* 2 - requested tolerance was too high */ - -/* -----Accounts on the computation--------------------------------------| */ -/* Upon exit, an interested user may retrieve accounts on the */ -/* computations. They are located in wsp and iwsp as indicated below: */ - -/* location mnemonic description */ -/* -----------------------------------------------------------------| */ -/* iwsp(1) = nmult, number of matrix-vector multiplications used */ -/* iwsp(2) = nexph, number of Hessenberg matrix exponential evaluated */ -/* iwsp(3) = nscale, number of repeated squaring involved in Pade */ -/* iwsp(4) = nstep, number of integration steps used up to completion */ -/* iwsp(5) = nreject, number of rejected step-sizes */ -/* iwsp(6) = ibrkflag, set to 1 if `happy breakdown' and 0 otherwise */ -/* iwsp(7) = mbrkdwn, if `happy brkdown', basis-size when it occured */ -/* -----------------------------------------------------------------| */ -/* wsp(1) = step_min, minimum step-size used during integration */ -/* wsp(2) = step_max, maximum step-size used during integration */ -/* wsp(3) = dummy */ -/* wsp(4) = dummy */ -/* wsp(5) = x_error, maximum among all local truncation errors */ -/* wsp(6) = s_error, global sum of local truncation errors */ -/* wsp(7) = tbrkdwn, if `happy breakdown', time when it occured */ -/* wsp(8) = t_now, integration domain successfully covered */ -/* wsp(9) = hump, i.e., max||exp(sA)||, s in [0,t] (or [t,0] if t<0) */ -/* wsp(10) = ||w||/||v||, scaled norm of the solution w. */ -/* -----------------------------------------------------------------| */ -/* The `hump' is a measure of the conditioning of the problem. The */ -/* matrix exponential is well-conditioned if hump = 1, whereas it is */ -/* poorly-conditioned if hump >> 1. However the solution can still be */ -/* relatively fairly accurate even when the hump is large (the hump */ -/* is an upper bound), especially when the hump and the scaled norm */ -/* of w [this is also computed and returned in wsp(10)] are of the */ -/* same order of magnitude (further details in reference below). */ - -/* ----------------------------------------------------------------------| */ -/* -----The following parameters may also be adjusted herein-------------| */ - -/* mxstep = 1000, . mxreject = 0, . ideg = 6, */ -/* mxstep : maximum allowable number of integration steps. */ -/* The value 0 means an infinite number of steps. */ - -/* mxreject: maximum allowable number of rejections at each step. */ -/* The value 0 means an infinite number of rejections. */ - -/* ideg : the Pade approximation of type (ideg,ideg) is used as */ -/* an approximation to exp(H). The value 0 switches to the */ -/* uniform rational Chebyshev approximation of type (14,14) */ - -/* delta : local truncation error `safety factor' */ - -/* gamma : stepsize `shrinking factor' */ - -/* ----------------------------------------------------------------------| */ -/* Roger B. Sidje (rbs@maths.uq.edu.au) */ -/* EXPOKIT: Software Package for Computing Matrix Exponentials. */ -/* ACM - Transactions On Mathematical Software, 24(1):130-156, 1998 */ -/* ----------------------------------------------------------------------| */ - iflag = 0; - - i__1 = m + 2; - int lwsp= n * (m + 2) + i__1 * i__1 * 5 + ideg + 1; - double *wsp = new double[lwsp]; - int *iwsp = new int[m+2]; - - - /* Parameter adjustments */ - --w; - --v; - --wsp; - --iwsp; - - -/* --- initialisations ... */ - - k1 = 2; - mh = m + 2; - iv = 1; - ih = iv + n * (m + 1) + n; - ifree = ih + mh * mh; - lfree = lwsp - ifree + 1; - ibrkflag = 0; - mbrkdwn = m; - nmult = 0; - nreject = 0; - nexph = 0; - nscale = 0; - t_out__ = abs(t); - tbrkdwn = 0.; - step_min__ = t_out__; - step_max__ = 0.; - nstep = 0; - s_error__ = 0.; - x_error__ = 0.; - t_now__ = 0.; - t_new__ = 0.; - p1 = 1.3333333333333333; -L1: - p2 = p1 - 1.; - p3 = p2 + p2 + p2; - eps = (d__1 = p3 - 1., abs(d__1)); - if (eps == 0.) { - goto L1; - } - if (tol <= eps) { - tol = sqrt(eps); - } - rndoff = eps * anorm; - break_tol__ = 1e-7; -/* >>> break_tol = tol */ -/* >>> break_tol = anorm*tol */ - sgn = t>=0?1. : -1.; - cblas_dcopy(n, &v[1], 1, &w[1], 1); - beta = cblas_dnrm2(n, &w[1], 1); - vnorm = beta; - hump = beta; - -/* --- obtain the very first stepsize ... */ - - sqr1 = sqrt(.1); - xm = 1. / (double) (m); - d__1 = (m + 1) / 2.72; - i__1 = m + 1; - p1 = tol * dipow(d__1, i__1) * sqrt((m + 1) * 6.28); - d__1 = p1 / (beta * 4. * anorm); - t_new__ = 1. / anorm * pow_dd(d__1, xm); - d__1 = log10(t_new__) - sqr1; - i__1 = nint(d__1) - 1; - p1 = dipow(10., i__1); - d__1 = t_new__ / p1 + .55; - t_new__ = floor(d__1) * p1; - -/* --- step-by-step integration ... */ - -L100: - if (t_now__ >= t_out__) { - goto L500; - } - ++nstep; -/* Computing MIN */ - d__1 = t_out__ - t_now__; - t_step__ = min(d__1,t_new__); - p1 = 1. / beta; - i__1 = n; - for (i__ = 1; i__ <= i__1; ++i__) { - wsp[iv + i__ - 1] = p1 * w[i__]; - } - i__1 = mh * mh; - for (i__ = 1; i__ <= i__1; ++i__) { - wsp[ih + i__ - 1] = 0.; - } - -/* --- Arnoldi loop ... */ - - j1v = iv + n; - i__1 = m; - for (j = 1; j <= i__1; ++j) { - ++nmult; - { - NRVec res(&wsp[j1v],n,true); - NRVec rhs(&wsp[j1v - n],n,true); - mat.gemv(0.,res,transpose?'t':'n',1.,rhs); - } - i__2 = j; - for (i__ = 1; i__ <= i__2; ++i__) { - hij = cblas_ddot(n, &wsp[iv + (i__ - 1) * n], 1, &wsp[j1v], 1); - d__1 = -hij; - cblas_daxpy(n, d__1, &wsp[iv + (i__ - 1) * n], 1, &wsp[j1v], 1); - wsp[ih + (j - 1) * mh + i__ - 1] = hij; - } - hj1j = cblas_dnrm2(n, &wsp[j1v], 1); -/* --- if `happy breakdown' go straightforward at the end ... */ - if (hj1j <= break_tol__) { -/* print*,'happy breakdown: mbrkdwn =',j,' h =',hj1j */ - k1 = 0; - ibrkflag = 1; - mbrkdwn = j; - tbrkdwn = t_now__; - t_step__ = t_out__ - t_now__; - goto L300; - } - wsp[ih + (j - 1) * mh + j] = hj1j; - d__1 = 1. / hj1j; - cblas_dscal(n, d__1, &wsp[j1v], 1); - j1v += n; -/* L200: */ - } - ++nmult; - { - NRVec res(&wsp[j1v],n,true); - NRVec rhs(&wsp[j1v - n],n,true); - mat.gemv(0.,res,transpose?'t':'n',1.,rhs); - } - avnorm = cblas_dnrm2(n, &wsp[j1v], 1); - -/* --- set 1 for the 2-corrected scheme ... */ - -L300: - wsp[ih + m * mh + m + 1] = 1.; - -/* --- loop while ireject p2 * 10.) { - err_loc__ = p2; - xm = 1. / (double) (m); - } else if (p1 > p2) { - err_loc__ = p1 * p2 / (p1 - p2); - xm = 1. / (double) (m); - } else { - err_loc__ = p1; - xm = 1. / (double) (m - 1); - } - } - -/* --- reject the step-size if the error is not acceptable ... */ - - if (k1 != 0 && err_loc__ > t_step__ * 1.2 * tol && (mxreject == 0 || - ireject < mxreject)) { - t_old__ = t_step__; - d__1 = t_step__ * tol / err_loc__; - t_step__ = t_step__ * .9 * pow_dd(d__1, xm); - d__1 = log10(t_step__) - sqr1; - i__1 = nint(d__1) - 1; - p1 = dipow(10., i__1); - d__1 = t_step__ / p1 + .55; - t_step__ = floor(d__1) * p1; -#ifdef DEBUG -/* print*,'t_step =',t_old */ -/* print*,'err_loc =',err_loc */ -/* print*,'err_required =',delta*t_old*tol */ -/* print*,'stepsize rejected, stepping down to:',t_step */ -#endif - ++ireject; - ++nreject; - if (mxreject != 0 && ireject > mxreject) { - iflag = 2; - laerror("Failure in DGEXPV: The requested tolerance is too high. Rerun with a smaller value."); - } - goto L401; - } - -/* --- now update w = beta*V*exp(t_step*H)*e1 and the hump ... */ - -/* Computing MAX */ - i__1 = 0, i__2 = k1 - 1; - mx = mbrkdwn + max(i__1,i__2); - cblas_dgemv(CblasRowMajor, CblasTrans, n, mx, beta, &wsp[iv], n, &wsp[iexph], 1, 0., &w[1],1); - beta = cblas_dnrm2(n, &w[1], 1); - hump = max(hump,beta); - -/* --- suggested value for the next stepsize ... */ - - d__1 = t_step__ * tol / err_loc__; - t_new__ = t_step__ * .9 * pow_dd(d__1, xm); - d__1 = log10(t_new__) - sqr1; - i__1 = nint(d__1) - 1; - p1 = dipow(10., i__1); - d__1 = t_new__ / p1 + .55; - t_new__ = floor(d__1) * p1; - err_loc__ = max(err_loc__,rndoff); - -/* --- update the time covered ... */ - - t_now__ += t_step__; - -/* --- display and keep some information ... */ - -#ifdef DEBUG -/* print*,'integration',nstep,'---------------------------------' */ -/* print*,'scale-square =',ns */ -/* print*,'step_size =',t_step */ -/* print*,'err_loc =',err_loc */ -/* print*,'next_step =',t_new */ -#endif - step_min__ = min(step_min__,t_step__); - step_max__ = max(step_max__,t_step__); - s_error__ += err_loc__; - x_error__ = max(x_error__,err_loc__); - if (mxstep == 0 || nstep < mxstep) { - goto L100; - } - iflag = 1; -L500: -/* iwsp(1) = nmult */ -/* iwsp(2) = nexph */ -/* iwsp(3) = nscale */ -/* iwsp(4) = nstep */ -/* iwsp(5) = nreject */ -/* iwsp(6) = ibrkflag */ -/* iwsp(7) = mbrkdwn */ -/* wsp(1) = step_min */ -/* wsp(2) = step_max */ -/* wsp(3) = 0.0d0 */ -/* wsp(4) = 0.0d0 */ -/* wsp(5) = x_error */ -/* wsp(6) = s_error */ -/* wsp(7) = tbrkdwn */ -/* wsp(8) = sgn*t_now */ -/* wsp(9) = hump/vnorm */ -/* wsp(10) = beta/vnorm */ - if(iflag) laerror("dgexpv error"); -delete[] ++wsp; delete[] ++iwsp; -return; -} -#undef FORNAME -#undef pow_dd - -#endif -//EXPOKIT - - //@@@ power series matrix logarithm? #endif